diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl index e7558a98479d2..65fd9cf485738 100644 --- a/base/strings/annotated.jl +++ b/base/strings/annotated.jl @@ -25,6 +25,17 @@ and a value (`Any`), paired together as a `Pair{Symbol, <:Any}`. Labels do not need to be unique, the same region can hold multiple annotations with the same label. +Code written for `AnnotatedString`s in general should conserve the following +properties: +- Which characters an annotation is applied to +- The order in which annotations are applied to each character + +Additional semantics may be introduced by specific uses of `AnnotatedString`s. + +A corollary of these rules is that adjacent, consecutively placed, annotations +with identical labels and values are equivalent to a single annotation spanning +the combined range. + See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref), [`annotations`](@ref), and [`annotate!`](@ref). @@ -273,36 +284,6 @@ annotatedstring(c::AnnotatedChar) = AnnotatedString(s::SubString{<:AnnotatedString}) = annotatedstring(s) -""" - annotatedstring_optimize!(str::AnnotatedString) - -Merge contiguous identical annotations in `str`. -""" -function annotatedstring_optimize!(s::AnnotatedString) - last_seen = Dict{Pair{Symbol, Any}, Int}() - i = 1 - while i <= length(s.annotations) - region, keyval = s.annotations[i] - prev = get(last_seen, keyval, 0) - if prev > 0 - lregion, _ = s.annotations[prev] - if last(lregion) + 1 == first(region) - s.annotations[prev] = - setindex(s.annotations[prev], - first(lregion):last(region), - 1) - deleteat!(s.annotations, i) - else - delete!(last_seen, keyval) - end - else - last_seen[keyval] = i - i += 1 - end - end - s -end - function repeat(str::AnnotatedString, r::Integer) r == 0 && return one(AnnotatedString) r == 1 && return str @@ -310,19 +291,19 @@ function repeat(str::AnnotatedString, r::Integer) annotations = Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}() len = ncodeunits(str) fullregion = firstindex(str):lastindex(str) - for (region, annot) in str.annotations - if region == fullregion - push!(annotations, (firstindex(unannot):lastindex(unannot), annot)) + if allequal(first, str.annotations) && first(first(str.annotations)) == fullregion + newfullregion = firstindex(unannot):lastindex(unannot) + for (_, annot) in str.annotations + push!(annotations, (newfullregion, annot)) end - end - for offset in 0:len:(r-1)*len - for (region, annot) in str.annotations - if region != fullregion + else + for offset in 0:len:(r-1)*len + for (region, annot) in str.annotations push!(annotations, (region .+ offset, annot)) end end end - AnnotatedString(unannot, annotations) |> annotatedstring_optimize! + AnnotatedString(unannot, annotations) end repeat(str::SubString{<:AnnotatedString}, r::Integer) = @@ -353,14 +334,9 @@ reverse(s::SubString{<:AnnotatedString}) = reverse(AnnotatedString(s)) function _annotate!(annlist::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) label, val = labelval if val === nothing - indices = searchsorted(annlist, (range,), by=first) - labelindex = filter(i -> first(annlist[i][2]) === label, indices) - for index in Iterators.reverse(labelindex) - deleteat!(annlist, index) - end + deleteat!(annlist, findall(ann -> ann[1] == range && first(ann[2]) === label, annlist)) else - sortedindex = searchsortedlast(annlist, (range,), by=first) + 1 - insert!(annlist, sortedindex, (range, Pair{Symbol, Any}(label, val))) + push!(annlist, (range, Pair{Symbol, Any}(label, val))) end end @@ -370,6 +346,9 @@ end Annotate a `range` of `str` (or the entire string) with a labeled value (`label` => `value`). To remove existing `label` annotations, use a value of `nothing`. + +The order in which annotations are applied to `str` is semantically meaningful, +as described in [`AnnotatedString`](@ref). """ annotate!(s::AnnotatedString, range::UnitRange{Int}, @nospecialize(labelval::Pair{Symbol, <:Any})) = (_annotate!(s.annotations, range, labelval); s) @@ -402,6 +381,9 @@ annotations that overlap with `position` will be returned. Annotations are provided together with the regions they apply to, in the form of a vector of region–annotation tuples. +In accordance with the semantics documented in [`AnnotatedString`](@ref), the +order of annotations returned matches the order in which they were applied. + See also: `annotate!`. """ annotations(s::AnnotatedString) = s.annotations @@ -536,10 +518,19 @@ function write(dest::AnnotatedIOBuffer, src::AnnotatedIOBuffer) nb end +""" + _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int}) + +Erase the presence of `annotations` within a certain `span`. + +This operates by removing all elements of `annotations` that are entirely +contained in `span`, truncating ranges that partially overlap, and splitting +annotations that subsume `span` to just exist either side of `span`. +""" function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, span::UnitRange{Int}) # Clear out any overlapping pre-existing annotations. filter!(((region, _),) -> first(region) < first(span) || last(region) > last(span), annotations) - extras = Tuple{UnitRange{Int}, Pair{Symbol, Any}}[] + extras = Tuple{Int, Tuple{UnitRange{Int}, Pair{Symbol, Any}}}[] for i in eachindex(annotations) region, annot = annotations[i] # Test for partial overlap @@ -550,31 +541,68 @@ function _clear_annotations_in_region!(annotations::Vector{Tuple{UnitRange{Int}, # If `span` fits exactly within `region`, then we've only copied over # the beginning overhang, but also need to conserve the end overhang. if first(region) < first(span) && last(span) < last(region) - push!(extras, (last(span)+1:last(region), annot)) + push!(extras, (i, (last(span)+1:last(region), annot))) end end - # Insert any extra entries in the appropriate position - for entry in extras - sortedindex = searchsortedlast(annotations, (first(entry),), by=first) + 1 - insert!(annotations, sortedindex, entry) - end + end + # Insert any extra entries in the appropriate position + for (offset, (i, entry)) in enumerate(extras) + insert!(annotations, i + offset, entry) end annotations end +""" + _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io)) + +Register new `annotations` in `io`, applying an `offset` to their regions. + +The largely consists of simply shifting the regions of `annotations` by `offset` +and pushing them onto `io`'s annotations. However, when it is possible to merge +the new annotations with recent annotations in accordance with the semantics +outlined in [`AnnotatedString`](@ref), we do so. More specifically, when there +is a run of the most recent annotations that are also present as the first +`annotations`, with the same value and adjacent regions, the new annotations are +merged into the existing recent annotations by simply extending their range. + +This is implemented so that one can say write an `AnnotatedString` to an +`AnnotatedIOBuffer` one character at a time without needlessly producing a +new annotation for each character. +""" function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{UnitRange{Int}, Pair{Symbol, Any}}}, offset::Int = position(io)) - if !eof(io) - for (region, annot) in annotations - region = first(region)+offset:last(region)+offset - sortedindex = searchsortedlast(io.annotations, (region,), by=first) + 1 - insert!(io.annotations, sortedindex, (region, annot)) - end - else - for (region, annot) in annotations - region = first(region)+offset:last(region)+offset - push!(io.annotations, (region, annot)) + run = 0 + if !isempty(io.annotations) && last(first(last(io.annotations))) == offset + for i in reverse(axes(annotations, 1)) + annot = annotations[i] + first(first(annot)) == 1 || continue + if last(annot) == last(last(io.annotations)) + valid_run = true + for runlen in 1:i + new_range, new_annot = annotations[begin+runlen-1] + old_range, old_annot = io.annotations[end-i+runlen] + if last(old_range) != offset || first(new_range) != 1 || old_annot != new_annot + valid_run = false + break + end + end + if valid_run + run = i + break + end + end end end + for runindex in 0:run-1 + old_index = lastindex(io.annotations) - run + 1 + runindex + old_region, annot = io.annotations[old_index] + new_region, _ = annotations[begin+runindex] + io.annotations[old_index] = (first(old_region):last(new_region)+offset, annot) + end + for index in run+1:lastindex(annotations) + region, annot = annotations[index] + start, stop = first(region), last(region) + push!(io.annotations, (start+offset:stop+offset, annot)) + end end function read(io::AnnotatedIOBuffer, ::Type{AnnotatedString{T}}) where {T <: AbstractString} diff --git a/deps/checksums/StyledStrings-ac472083359dde956aed8c61d43b8158ac84d9ce.tar.gz/md5 b/deps/checksums/StyledStrings-ac472083359dde956aed8c61d43b8158ac84d9ce.tar.gz/md5 new file mode 100644 index 0000000000000..758a74bce9dae --- /dev/null +++ b/deps/checksums/StyledStrings-ac472083359dde956aed8c61d43b8158ac84d9ce.tar.gz/md5 @@ -0,0 +1 @@ +6969fb6d2e8585d26beef865910ec8ef diff --git a/deps/checksums/StyledStrings-ac472083359dde956aed8c61d43b8158ac84d9ce.tar.gz/sha512 b/deps/checksums/StyledStrings-ac472083359dde956aed8c61d43b8158ac84d9ce.tar.gz/sha512 new file mode 100644 index 0000000000000..3d1ac8791e14d --- /dev/null +++ b/deps/checksums/StyledStrings-ac472083359dde956aed8c61d43b8158ac84d9ce.tar.gz/sha512 @@ -0,0 +1 @@ +281292e8478d72ab66b84cbd4f42e5dc2dd5054e8c54a79de8f0c0537d28962b460e67fe71230ead6b02386b87d0423879d51ce53a2b2427ce55866d62d6ebde diff --git a/deps/checksums/StyledStrings-bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b.tar.gz/md5 b/deps/checksums/StyledStrings-bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b.tar.gz/md5 deleted file mode 100644 index 511b60c7e2217..0000000000000 --- a/deps/checksums/StyledStrings-bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -8fc4fd7e90d35e7d8d06a6b7c312ec03 diff --git a/deps/checksums/StyledStrings-bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b.tar.gz/sha512 b/deps/checksums/StyledStrings-bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b.tar.gz/sha512 deleted file mode 100644 index d5e2302499a2c..0000000000000 --- a/deps/checksums/StyledStrings-bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -137360872c9b75276426efa9e9096e442115a554b7e00dc98ce02904fa1a535f76e48ba1366fc517794490a494cccc3238d006ebb43dadb5594e5099a2c36f55 diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version index 8e489a5daf289..81a599f125406 100644 --- a/stdlib/StyledStrings.version +++ b/stdlib/StyledStrings.version @@ -1,4 +1,4 @@ STYLEDSTRINGS_BRANCH = main -STYLEDSTRINGS_SHA1 = bfdb4c3f73a93a956ad48b0f06f89eb1cd40ff6b +STYLEDSTRINGS_SHA1 = ac472083359dde956aed8c61d43b8158ac84d9ce STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1 diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl index 02325b5b8b2ee..13d463b4c7d21 100644 --- a/test/strings/annotated.jl +++ b/test/strings/annotated.jl @@ -28,10 +28,10 @@ @test Base.AnnotatedString(str[3:4]) == Base.AnnotatedString("me", [(1:2, :thing => 0x01), (1:2, :all => 0x03)]) @test Base.AnnotatedString(str[3:6]) == - Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (1:4, :all => 0x03), (4:4, :other => 0x02)]) - @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)]) + Base.AnnotatedString("me s", [(1:2, :thing => 0x01), (4:4, :other => 0x02), (1:4, :all => 0x03)]) + @test str == Base.AnnotatedString("some string", [(1:4, :thing => 0x01), (6:11, :other => 0x02), (1:11, :all => 0x03)]) @test str != Base.AnnotatedString("some string") - @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (6:6, :other => 0x02), (11:11, :all => 0x03)]) + @test str != Base.AnnotatedString("some string", [(1:1, :thing => 0x01), (1:11, :all => 0x03), (6:6, :other => 0x02)]) @test str != Base.AnnotatedString("some string", [(1:4, :thing => 0x11), (1:11, :all => 0x13), (6:11, :other => 0x12)]) @test str != Base.AnnotatedString("some thingg", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)]) @test Base.AnnotatedString([Base.AnnotatedChar('a', [:a => 1]), Base.AnnotatedChar('b', [:b => 2])]) == @@ -55,15 +55,8 @@ # @test collect(Base.eachstyle(str)) == # [("some", [:thing => 0x01, :all => 0x03]), # (" string", [:all => 0x03, :other => 0x02])] - @test ==(Base.annotatedstring_optimize!( - Base.AnnotatedString("abc", [(1:1, :val => 1), - (2:2, :val => 2), - (2:2, :val => 1), - (3:3, :val => 2)])), - Base.AnnotatedString("abc", [(1:2, :val => 1), - (2:3, :val => 2)])) @test chopprefix(sprint(show, str), "Base.") == - "AnnotatedString{String}(\"some string\", [(1:4, :thing => 0x01), (1:11, :all => 0x03), (6:11, :other => 0x02)])" + "AnnotatedString{String}(\"some string\", [(1:4, :thing => 0x01), (6:11, :other => 0x02), (1:11, :all => 0x03)])" @test eval(Meta.parse(repr(str))) == str @test sprint(show, MIME("text/plain"), str) == "\"some string\"" end @@ -153,8 +146,8 @@ end # Check `annotate!`, including region sorting @test truncate(aio, 0).io.size == 0 @test write(aio, "hello world") == ncodeunits("hello world") - @test Base.annotate!(aio, 7:11, :tag => 2) === aio @test Base.annotate!(aio, 1:5, :tag => 1) === aio + @test Base.annotate!(aio, 7:11, :tag => 2) === aio @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Reading @test read(seekstart(deepcopy(aio.io)), String) == "hello world" @@ -182,24 +175,42 @@ end @test Base.annotations(aio) == [(1:5, :tag => 1), (7:11, :tag => 2)] # Should be unchanged @test write(seek(aio, 0), Base.AnnotatedString("hey-o", [(1:5, :hey => 'o')])) == 5 @test read(seekstart(aio), String) == "hey-o alice" - @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:11, :tag => 2)] # First annotation should have been entirely replaced + @test Base.annotations(aio) == [(7:11, :tag => 2), (1:5, :hey => 'o')] # First annotation should have been entirely replaced @test write(seek(aio, 7), Base.AnnotatedString("bbi", [(1:3, :hey => 'a')])) == 3 # a[lic => bbi]e ('alice' => 'abbie') @test read(seekstart(aio), String) == "hey-o abbie" - @test Base.annotations(aio) == [(1:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)] + @test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (1:5, :hey => 'o'), (8:10, :hey => 'a')] @test write(seek(aio, 0), Base.AnnotatedString("ab")) == 2 # Check first annotation's region is adjusted correctly @test read(seekstart(aio), String) == "aby-o abbie" - @test Base.annotations(aio) == [(3:5, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)] + @test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (3:5, :hey => 'o'), (8:10, :hey => 'a')] @test write(seek(aio, 3), Base.AnnotatedString("ss")) == 2 @test read(seekstart(aio), String) == "abyss abbie" - @test Base.annotations(aio) == [(3:3, :hey => 'o'), (7:7, :tag => 2), (8:10, :hey => 'a'), (11:11, :tag => 2)] + @test Base.annotations(aio) == [(7:7, :tag => 2), (11:11, :tag => 2), (3:3, :hey => 'o'), (8:10, :hey => 'a')] # Writing one buffer to another newaio = Base.AnnotatedIOBuffer() @test write(newaio, seekstart(aio)) == 11 @test read(seekstart(newaio), String) == "abyss abbie" @test Base.annotations(newaio) == Base.annotations(aio) @test write(seek(newaio, 5), seek(aio, 5)) == 6 - @test Base.annotations(newaio) == Base.annotations(aio) + @test sort(Base.annotations(newaio)) == sort(Base.annotations(aio)) @test write(newaio, seek(aio, 5)) == 6 @test read(seekstart(newaio), String) == "abyss abbie abbie" - @test Base.annotations(newaio) == vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)]) + @test sort(Base.annotations(newaio)) == sort(vcat(Base.annotations(aio), [(13:13, :tag => 2), (14:16, :hey => 'a'), (17:17, :tag => 2)])) + # The `_insert_annotations!` cautious-merging optimisation + aio = Base.AnnotatedIOBuffer() + @test write(aio, Base.AnnotatedChar('a', [:a => 1, :b => 2])) == 1 + @test Base.annotations(aio) == [(1:1, :a => 1), (1:1, :b => 2)] + @test write(aio, Base.AnnotatedChar('b', [:a => 1, :b => 2])) == 1 + @test Base.annotations(aio) == [(1:2, :a => 1), (1:2, :b => 2)] + let aio2 = copy(aio) # A different start makes merging too risky to do. + @test write(aio2, Base.AnnotatedChar('c', [:a => 0, :b => 2])) == 1 + @test Base.annotations(aio2) == [(1:2, :a => 1), (1:2, :b => 2), (3:3, :a => 0), (3:3, :b => 2)] + end + let aio2 = copy(aio) # Merging some run of the most recent annotations is fine though. + @test write(aio2, Base.AnnotatedChar('c', [:b => 2])) == 1 + @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2)] + end + let aio2 = copy(aio) # ...and any subsequent annotations after a matching run can just be copied over. + @test write(aio2, Base.AnnotatedChar('c', [:b => 2, :c => 3, :d => 4])) == 1 + @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2), (3:3, :c => 3), (3:3, :d => 4)] + end end