Skip to content

Commit

Permalink
Merge 7f252da into 5044489
Browse files Browse the repository at this point in the history
  • Loading branch information
oxinabox committed Jan 8, 2019
2 parents 5044489 + 7f252da commit 4fa9557
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ This package implements a variety of data structures, including
- CircularDeque
- Stack
- Queue
- Accumulators and Counters
- Accumulators and Counters (i.e. Multisets / Bags)
- Disjoint Sets
- Binary Heap
- Mutable Binary Heap
Expand Down
33 changes: 33 additions & 0 deletions docs/src/accumulators.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,36 @@ pop!(a, x) # remove a key x from a, and return its current value
merge(a, a2) # return a new accumulator/counter that combines the
# values/counts in both a and a2
```


merge is the multiset sum (Sometimes written ⊎) operation.

## Use as a MultiSet

An `Accumulator{T, <:Integer}` such as is returned by `counter`, is a [multiset](https://en.wikipedia.org/wiki/Multiset) or Bag, of objects of type `T`.
If the count type is not an integer but a more general real number,
then this is a form of fuzzy multiset.
We support a number of operations to support the use of `Accumulators` as multisets.


Note that these multiset operations will throw an error if the accumulator has negative or zero counts for any items,.

```julia

setdiff(a1, a2) # The opposite of `merge` (i.e. multiset sum),
# Returns `a1` with the count of items in `a2` removed, down to a minimum of zero
# max(a1[v] - a2[v], 0) over all `v` in universe


union(a1, a2) # multiset union (sometimes called maximum, or lowest common multiple)
# returns a new multiset with the counts being the higher of those in `a1` or `a2`.
# max(a1[v], a2[v]) over all `v` in universe

intersect(a1, a2) # multiset intersection (sometimes called infimum or greatest common divisor)
# returns a new multiset with the counts being the lowest of those in `a1` or `a2`.
# Note that this means things not occurring in both with be removed (count zero).
# min(a1[v], a2[v]) over all `v` in universe
```



58 changes: 56 additions & 2 deletions src/accumulator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ function eltype_for_accumulator(seq::T) where {T<:Base.Generator}
end



copy(ct::Accumulator) = Accumulator(copy(ct.map))

length(a::Accumulator) = length(a.map)
Expand Down Expand Up @@ -130,7 +129,7 @@ end
Resets the count of `x` to zero.
Returns its former count.
"""
reset!(ct::Accumulator, x) = pop!(ct.map, x)
reset!(ct::Accumulator{<:Any,V}, x) where V = haskey(ct.map, x) ? pop!(ct.map, x) : zero(V)

"""
nlargest(acc::Accumulator, [n])
Expand Down Expand Up @@ -182,4 +181,59 @@ nsmallest(acc::Accumulator, n) = partialsort!(collect(acc), 1:n, by=last, rev=fa
@deprecate pop!(ct::Accumulator, x) reset!(ct, x)
@deprecate push!(ct1::Accumulator, ct2::Accumulator) merge!(ct1,ct2)

###########################################################
## Multiset operations

struct MultiplicityException{K,V} <: Exception
k::K
v::V
end

function Base.showerror(io::IO, err::MultiplicityException)
print(io, "When using an Accumulator as a Multiset, all elements must have positive multiplicity")
print(io, " element `$(err.k)` has multiplicity $(err.v)")
end

drop_nonpositive!(a::Accumulator, k) = (a[k] > 0 || delete!(a.map, k))


function Base.setdiff(a::Accumulator, b::Accumulator)
ret = copy(a)
for (k, v) in b
v > 0 || throw(MultiplicityException(k, v))
dec!(ret, k, v)
drop_nonpositive!(ret, k)
end
return ret
end

Base.issubset(a::Accumulator, b::Accumulator) = all(b[k] >= v for (k, v) in a)

Base.union(a::Accumulator, b::Accumulator, c::Accumulator...) = union(union(a,b), c...)
Base.union(a::Accumulator, b::Accumulator) = union!(copy(a), b)
function Base.union!(a::Accumulator, b::Accumulator)
for (kb, vb) in b
vb > 0 || throw(MultiplicityException(kb, vb))
a[kb] = max(a[kb], vb)
end
return a
end


Base.intersect(a::Accumulator, b::Accumulator, c::Accumulator...) = insersect(intersect(a,b), c...)
Base.intersect(a::Accumulator, b::Accumulator) = intersect!(copy(a), b)
function Base.intersect!(a::Accumulator, b::Accumulator)
for (kb, vb) in b
vb > 0 || throw(MultiplicityException(kb, vb))
a[kb] = min(a[kb], vb)
drop_nonpositive!(a, kb) # Drop any that ended up zero
end
# Need to do this bidirectionally, as anything not in both needs to be removed
for (ka,va) in a
va > 0 || throw(MultiplicityException(ka, va))
a[ka] = min(b[ka], va)

drop_nonpositive!(a, ka) # Drop any that ended up zero
end
return a
end
2 changes: 1 addition & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using Serialization

import DataStructures: IntSet

@test isempty(detect_ambiguities(Base, Core, DataStructures))
@test [] == detect_ambiguities(Base, Core, DataStructures)

tests = ["int_set",
"deque",
Expand Down
26 changes: 26 additions & 0 deletions test/test_accumulator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,32 @@
@test_throws BoundsError nsmallest(counter("a"),2)
end

@testset "Multiset" begin
@testset "setdiff" begin
@test setdiff(counter([1,2,3]), counter([2, 4])) == counter([3, 1])
@test setdiff(counter([1,2,3]), counter([2,2,4])) == counter([3, 1])
@test setdiff(counter([1,2,2,2,3]), counter([2,2,4])) == counter([1,2,3])
end

@testset "union" begin
@test (counter([1,2,3]), counter([1,2,3])) == counter([1,2,3])
@test (counter([1,2,3]), counter([1,2,2,3])) == counter([1,2,2,3])
@test (counter([1,3]), counter([2,2])) == counter([1,2,2,3])
@test (counter([1,2,3]), counter(Int[])) == counter([1,2,3])
end

@testset "intersect" begin
@test (counter([1,2,3]), counter([1,2,3])) == counter([1,2,3])
@test (counter([1,2,3]), counter([1,2,2,3])) == counter([1,2,3])
@test (counter([1,3]), counter([2,2])) == counter(Int[])
@test (counter([1,2,3]), counter(Int[])) == counter(Int[])
end




end

end # @testset Accumulators


Expand Down

0 comments on commit 4fa9557

Please sign in to comment.