diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl index b9797dee910c2..a3a17d53bd451 100644 --- a/stdlib/REPL/src/docview.jl +++ b/stdlib/REPL/src/docview.jl @@ -624,22 +624,61 @@ bestmatch(needle, haystack) = longer(matchinds(needle, haystack, acronym = true), matchinds(needle, haystack)) -avgdistance(xs) = - isempty(xs) ? 0 : - (xs[end] - xs[1] - length(xs)+1)/length(xs) +# Optimal string distance: Counts the minimum number of insertions, deletions, +# transpositions or substitutions to go from one string to the other. +function string_distance(a::AbstractString, lena::Integer, b::AbstractString, lenb::Integer) + if lena > lenb + a, b = b, a + lena, lenb = lenb, lena + end + start = 0 + for (i, j) in zip(a, b) + if a == b + start += 1 + else + break + end + end + start == lena && return lenb - start + vzero = collect(1:(lenb - start)) + vone = similar(vzero) + prev_a, prev_b = first(a), first(b) + current = 0 + for (i, ai) in enumerate(a) + i > start || (prev_a = ai; continue) + left = i - start - 1 + current = i - start + transition_next = 0 + for (j, bj) in enumerate(b) + j > start || (prev_b = bj; continue) + # No need to look beyond window of lower right diagonal + above = current + this_transition = transition_next + transition_next = vone[j - start] + vone[j - start] = current = left + left = vzero[j - start] + if ai != bj + # Minimum between substitution, deletion and insertion + current = min(current + 1, above + 1, left + 1) + if i > start + 1 && j > start + 1 && ai == prev_b && prev_a == bj + current = min(current, (this_transition += 1)) + end + end + vzero[j - start] = current + prev_b = bj + end + prev_a = ai + end + current +end -function fuzzyscore(needle, haystack) - score = 0. - is, acro = bestmatch(needle, haystack) - score += (acro ? 2 : 1)*length(is) # Matched characters - score -= 2(length(needle)-length(is)) # Missing characters - !acro && (score -= avgdistance(is)/10) # Contiguous - !isempty(is) && (score -= sum(is)/length(is)/100) # Closer to beginning - return score +function fuzzyscore(needle::AbstractString, haystack::AbstractString) + lena, lenb = length(needle), length(haystack) + 1 - (string_distance(needle, lena, haystack, lenb) / max(lena, lenb)) end function fuzzysort(search::String, candidates::Vector{String}) - scores = map(cand -> (fuzzyscore(search, cand), -Float64(levenshtein(search, cand))), candidates) + scores = map(cand -> fuzzyscore(search, cand), candidates) candidates[sortperm(scores)] |> reverse end @@ -690,7 +729,7 @@ function printmatches(io::IO, word, matches; cols::Int = _displaysize(io)[2]) total = 0 for match in matches total + length(match) + 1 > cols && break - fuzzyscore(word, match) < 0 && break + fuzzyscore(word, match) < 0.5 && break print(io, " ") printmatch(io, word, match) total += length(match) + 1 diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl index 22701ead7883d..76200a87298da 100644 --- a/stdlib/REPL/test/docview.jl +++ b/stdlib/REPL/test/docview.jl @@ -54,6 +54,15 @@ end # https://github.com/JunoLab/FuzzyCompletions.jl/issues/7 # shouldn't throw when there is a space in a middle of query @test (REPL.matchinds("a ", "a file.txt"); true) + @test isapprox(REPL.fuzzyscore("abcdef", ""), 0.0; atol=0.001) + @test 0.8 < REPL.fuzzyscore( + "supercalifragilisticexpialidocious", + "bupercalifragilisticexpialidocious" + ) < 1.0 + + # Unicode + @test 1.0 > REPL.fuzzyscore("αkδψm", "αkδm") > 0.0 + @test 1.0 > REPL.fuzzyscore("αkδψm", "α") > 0.0 end @testset "Unicode doc lookup (#41589)" begin