Skip to content

Commit

Permalink
Merge 15333d8 into d293264
Browse files Browse the repository at this point in the history
  • Loading branch information
NHDaly committed Oct 24, 2020
2 parents d293264 + 15333d8 commit 6b54fc8
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/DataStructures.jl
Expand Up @@ -43,7 +43,7 @@ module DataStructures
export exclusive, inclusive, semitokens
export orderobject, ordtype, Lt, compare, onlysemitokens

export MultiDict, enumerateall
export MultiDict, MultiSet, enumerateall
export RobinDict
export OrderedRobinDict, isordered
export SwissDict
Expand Down Expand Up @@ -82,6 +82,7 @@ module DataStructures

import .Tokens: IntSemiToken

include("multi_set.jl")
include("multi_dict.jl")
include("sorted_dict.jl")
include("sorted_multi_dict.jl")
Expand Down
4 changes: 4 additions & 0 deletions src/accumulator.jl
Expand Up @@ -67,6 +67,10 @@ Base.iterate(ct::Accumulator, s...) = iterate(ct.map, s...)

# manipulation

Base.empty!(ct::Accumulator) = (empty!(ct.map); ct)

Base.sizehint!(a::Accumulator, newsz) = (sizehint!(a.map, newsz); a)

"""
inc!(ct::Accumulator, x, [v=1])
Expand Down
155 changes: 155 additions & 0 deletions src/multi_set.jl
@@ -0,0 +1,155 @@
using Base: IteratorEltype, EltypeUnknown, HasEltype, @default_eltype

# TODO:
# - `filter!(f, ::MultiSet)`
# - (`Base.unsafe_filter!` requires it to be safe to delete an element while iterating.
# Still need to determine whether this is safe to see if we could use that.)

# Internal helper inner constructor for MultiSet
function _multiset_from_accumulator end

"""
MultiSet{T}([iter])
Construct a `MultiSet` of the values generated by the given iterable object.
A `MultiSet` is exactly like a `Set`, except that it can contain duplicate entries.
Operations on a `MultiSet` can be performed in `O(log(n))` time, exactly like with a `Set`.
Other common names for a `MultiSet` in mathematics include `multiset`, `bag`, `mset`.
"""
struct MultiSet{T} <: AbstractSet{T}
data::Accumulator{T,Int}

@__MODULE__()._multiset_from_accumulator(data::Accumulator{T,Int}) where T = new{T}(data)
end
MultiSet{T}() where {T} = _multiset_from_accumulator(Accumulator{T,Int}())
MultiSet{T}(s::MultiSet{T}) where {T} = _multiset_from_accumulator(copy(s.data))

function MultiSet{T}(items) where T
ms = MultiSet{T}()
for item in items
insert!(ms, item)
end
ms
end

MultiSet() = MultiSet{Any}()
MultiSet(itr) = _MultiSet(itr, IteratorEltype(itr))

_MultiSet(itr, ::HasEltype) = MultiSet{eltype(itr)}(itr)

function _MultiSet(itr, ::EltypeUnknown)
T = @default_eltype(itr)
(isconcretetype(T) || T === Union{}) || return Base.grow_to!(MultiSet{T}(), itr)
return MultiSet{T}(itr)
end

# Copied exactly from Base.show(::IO, ::Set)
function Base.show(io::IO, ms::MultiSet)
if isempty(ms)
if get(io, :typeinfo, Any) == typeof(ms)
print(io, "MultiSet()")
else
show(io, typeof(ms))
print(io, "()")
end
else
print(io, "MultiSet(")
Base.show_vector(io, ms)
print(io, ')')
end
end

function Base.insert!(ms::MultiSet, t)
inc!(ms.data, t)
end

function Base.delete!(ms::MultiSet, t)
vs = ms.data
if vs[t] > 1
dec!(vs, t)
elseif vs[t] == 1
# TODO: We should probably be able to delete a key from an Accumulator?
delete!(vs.map, t)
end
ms
end

function Base.push!(ms::MultiSet, t)
insert!(ms, t)
return ms
end
function Base.pop!(ms::MultiSet, t)
t in ms || throw(KeyError(t))
delete!(ms, t)
return t
end
Base.pop!(s::MultiSet, x, default) = (x in s ? pop!(s, x) : default)
function Base.pop!(ms::MultiSet)
isempty(ms) && throw(ArgumentError("multiset must be non-empty"))
# TODO: Should Accumulator have a `pop!()`?
v = first(ms.data)[1]
delete!(ms, v)
return v
end

Base.empty(ms::MultiSet{T}, ::Type{U}=T) where {T,U} = Base.emptymutable(ms, U)
Base.emptymutable(::MultiSet{T}, ::Type{U}=T) where {T,U} = MultiSet{U}()
Base.empty!(ms::MultiSet) = (empty!(ms.data); ms)

Base.copy(ms::MultiSet) = Base.copymutable(ms)
Base.copymutable(ms::MultiSet{T}) where {T} = MultiSet{T}(ms)

Base.length(ms::MultiSet) = sum(ms.data)
Base.in(x, ms::MultiSet) = haskey(ms.data, x)

Base.sizehint!(ms::MultiSet, newsz) = (sizehint!(ms.data, newsz); ms)

function Base.iterate(ms::MultiSet)
i = iterate(ms.data)
if i === nothing
return nothing
end
((v,count), vstate) = i # Should be non-empty
return (v, (v, count-1, vstate))
end
function Base.iterate(ms::MultiSet, state)
(v, count, vstate) = state
if count === 0
# Finished repeating v, move on to the next value
i = iterate(ms.data, vstate)
if i === nothing
return nothing
end
((v,count), vstate) = i # Should be non-empty
end
return (v, (v, count-1, vstate))
end

# Note that `union()` is different than AbstractSet, because we have to take max of counts.
# From wikipedia: https://en.wikipedia.org/wiki/Multiset
# Union: the union (called, in some contexts, the maximum or lowest common multiple) of A and B is the multiset C with multiplicity function
# m_C(x) = max(m_A(x),m_B(x)) ∀x ∈ U
Base.union(ms1::MultiSet{T}, ms2::MultiSet{T}) where T =
# union(::Accumulator, ::Accumulator) takes max of the counts of the two Accumulators
_multiset_from_accumulator(union(ms1.data, ms2.data))

# Note that `intersect()` is different than AbstractSet, because we have to take min of counts.
# From wikipedia: https://en.wikipedia.org/wiki/Multiset
# Intersection: the intersection (called, in some contexts, the infimum or greatest common divisor) of A and B is the multiset C with multiplicity function
# m_C(x) = min(m_A(x),m_B(x)) ∀x ∈ U
Base.intersect(ms1::MultiSet{T}, ms2::MultiSet{T}) where T =
# intersect(::Accumulator, ::Accumulator) takes min of the counts of the two Accumulators
_multiset_from_accumulator(intersect(ms1.data, ms2.data))

function Base.union!(ms1::MultiSet{T}, ms2::MultiSet{T}) where T
union!(ms1.data, ms2.data)
return ms1
end
function Base.intersect!(ms1::MultiSet{T}, ms2::MultiSet{T}) where T
intersect!(ms1.data, ms2.data)
return ms1
end

Base.setdiff(ms1::MultiSet{T}, ms2::MultiSet{T}) where T =
_multiset_from_accumulator(setdiff(ms1.data, ms2.data))

0 comments on commit 6b54fc8

Please sign in to comment.