From 5c6245e2dc4261a1ce0d5c7732ee239496add8be Mon Sep 17 00:00:00 2001 From: TEC Date: Sat, 27 Apr 2024 18:35:01 +0800 Subject: [PATCH] Make _insert_annotations! optimising This is a rather important optimisation, since it prevents the annotation blow-up that can result from say writing to an AnnotatedIOBuffer char-by-char. Originally I was just going to pass the AnnotatedString produced when reading the AnnotatedIOBuffer through annotatedstring_optimize!, but now that's been removed, this seems like the best past forwards (it's also actually a better approach than applying annotatedstring_optimize!, just hard to justify when that code already existed). --- base/strings/annotated.jl | 42 ++++++++++++++++++++++++++++++++++++--- test/strings/annotated.jl | 18 +++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index 5baf6f41d2d9c..47a858a0ebcb8 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -509,9 +509,45 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, end function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io)) - for (region, annot) in annotations - region = first(region)+offset:last(region)+offset - push!(io.annotations, (region, annot)) + # The most basic (but correct) approach would be just to push + # each of `annotations` to `io.annotations`, adjusting the region by + # `offset`. However, there is a specific common case probably worth + # optimising, which is when an existing styles are just extended. + # To handle this efficiently and conservatively, we look to see if + # there's a run at the end of `io.annotations` that matches annotations + # at the start of `annotations`. If so, this run of annotations is merged. + run = 0 + if !isempty(io.annotations) && last(first(last(io.annotations))) == offset + for i in reverse(axes(annotations, 1)) + annot = annotations[i] + first(first(annot)) == 1 || continue + if last(annot) == last(last(io.annotations)) + valid_run = true + for runlen in 1:i + new_range, new_annot = annotations[begin+runlen-1] + old_range, old_annot = io.annotations[end-i+runlen] + if last(old_range) != offset || first(new_range) != 1 || old_annot != new_annot + valid_run = false + break + end + end + if valid_run + run = i + break + end + end + end + end + for runindex in 0:run-1 + old_index = lastindex(io.annotations) - run + 1 + runindex + old_region, annot = io.annotations[old_index] + new_region, _ = annotations[begin+runindex] + io.annotations[old_index] = (first(old_region):last(new_region)+offset, annot) + end + for index in run+1:lastindex(annotations) + region, annot = annotations[index] + start, stop = first(region), last(region) + push!(io.annotations, (start+offset:stop+offset, annot)) end end diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl index a1bf9f1b3ee24..f16c2bec348ca 100644 --- a/test/strings/annotated.jl +++ b/test/strings/annotated.jl @@ -191,4 +191,22 @@ end @test write(newaio, seek(aio, 5)) == 6 @test read(seekstart(newaio), String) == "abyss abbie abbie" @test sort(Base.annotations(newaio)) == sort(vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])) + # The `_insert_annotations!` cautious-merging optimisation + aio = Base.AnnotatedIOBuffer() + @test write(aio, Base.AnnotatedChar('a', [:a => 1, :b => 2])) == 1 + @test Base.annotations(aio) == [(1:1, :a => 1), (1:1, :b => 2)] + @test write(aio, Base.AnnotatedChar('b', [:a => 1, :b => 2])) == 1 + @test Base.annotations(aio) == [(1:2, :a => 1), (1:2, :b => 2)] + let aio2 = copy(aio) # A different start makes merging too risky to do. + @test write(aio2, Base.AnnotatedChar('c', [:a => 0, :b => 2])) == 1 + @test Base.annotations(aio2) == [(1:2, :a => 1), (1:2, :b => 2), (3:3, :a => 0), (3:3, :b => 2)] + end + let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though. + @test write(aio2, Base.AnnotatedChar('c', [:b => 2])) == 1 + @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2)] + end + let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over. + @test write(aio2, Base.AnnotatedChar('c', [:b => 2, :c => 3, :d => 4])) == 1 + @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2), (3:3, :c => 3), (3:3, :d => 4)] + end end