Skip to content

Commit

Permalink
Make _insert_annotations! optimising
Browse files Browse the repository at this point in the history
This is a rather important optimisation, since it prevents the
annotation blow-up that can result from say writing to an
AnnotatedIOBuffer char-by-char. Originally I was just going to pass the
AnnotatedString produced when reading the AnnotatedIOBuffer through
annotatedstring_optimize!, but now that's been removed, this seems like
the best past forwards (it's also actually a better approach than
applying annotatedstring_optimize!, just hard to justify when that code
already existed).
  • Loading branch information
tecosaur committed Apr 30, 2024
1 parent 2656884 commit 2b7d9d8
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 3 deletions.
42 changes: 39 additions & 3 deletions base/strings/annotated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,45 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int},
end

function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io))
for (region, annot) in annotations
region = first(region)+offset:last(region)+offset
push!(io.annotations, (region, annot))
# The most basic (but correct) approach would be just to push
# each of `annotations` to `io.annotations`, adjusting the region by
# `offset`. However, there is a specific common case probably worth
# optimising, which is when an existing styles are just extended.
# To handle this efficiently and conservatively, we look to see if
# there's a run at the end of `io.annotations` that matches annotations
# at the start of `annotations`. If so, this run of annotations is merged.
run = 0
if !isempty(io.annotations) && last(first(last(io.annotations))) == offset
for i in reverse(axes(annotations, 1))
annot = annotations[i]
first(first(annot)) == 1 || continue
if last(annot) == last(last(io.annotations))
valid_run = true
for runlen in 1:i
new_range, new_annot = annotations[begin+runlen-1]
old_range, old_annot = io.annotations[end-i+runlen]
if last(old_range) != offset || first(new_range) != 1 || old_annot != new_annot
valid_run = false
break
end
end
if valid_run
run = i
break
end
end
end
end
for runindex in 0:run-1
old_index = lastindex(io.annotations) - run + 1 + runindex
old_region, annot = io.annotations[old_index]
new_region, _ = annotations[begin+runindex]
io.annotations[old_index] = (first(old_region):last(new_region)+offset, annot)
end
for index in run+1:lastindex(annotations)
region, annot = annotations[index]
start, stop = first(region), last(region)
push!(io.annotations, (start+offset:stop+offset, annot))
end
end

Expand Down
18 changes: 18 additions & 0 deletions test/strings/annotated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,4 +187,22 @@ end
@test write(newaio, seek(aio, 5)) == 6
@test read(seekstart(newaio), String) == "abyss abbie abbie"
@test sort(Base.annotations(newaio)) == sort(vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)]))
# The `_insert_annotations!` cautious-merging optimisation
aio = Base.AnnotatedIOBuffer()
@test write(aio, Base.AnnotatedChar('a', [:a => 1, :b => 2])) == 1
@test Base.annotations(aio) == [(1:1, :a => 1), (1:1, :b => 2)]
@test write(aio, Base.AnnotatedChar('b', [:a => 1, :b => 2])) == 1
@test Base.annotations(aio) == [(1:2, :a => 1), (1:2, :b => 2)]
let aio2 = copy(aio) # A different start makes merging too risky to do.
@test write(aio2, Base.AnnotatedChar('c', [:a => 0, :b => 2])) == 1
@test Base.annotations(aio2) == [(1:2, :a => 1), (1:2, :b => 2), (3:3, :a => 0), (3:3, :b => 2)]
end
let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though.
@test write(aio2, Base.AnnotatedChar('c', [:b => 2])) == 1
@test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2)]
end
let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over.
@test write(aio2, Base.AnnotatedChar('c', [:b => 2, :c => 3, :d => 4])) == 1
@test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2), (3:3, :c => 3), (3:3, :d => 4)]
end
end

0 comments on commit 2b7d9d8

Please sign in to comment.