diff --git a/README.md b/README.md index 48e4750e2..e43190c5c 100644 --- a/README.md +++ b/README.md @@ -167,3 +167,54 @@ h = mutable_binary_maxheap(Int) # create an empty mutable min/max heap h = mutable_binary_minheap([1,4,3,2]) h = mutable_binary_maxheap([1,4,3,2]) # create a mutable min/max heap from a vector ``` + + +## DefaultDict + +A DefaultDict allows specification of a default value to return when a requested key is not in a dictionary. + +The version of ``DefaultDict`` provided here is a wrapper around an ``Associative`` type, which defaults to ``Dict``. All ``Associative`` and ``Dict`` methods are supported. + +Constructors for ``DefaultDict`` include +```julia +DefaultDict(default, d::Associative=Dict()) # create a DefaultDict with a default value or function, + # optionally wrapping an existing dictionary +DefaultDict(KeyType, ValueType, default) # create a DefaultDict with Dict type (KeyType,ValueType) +``` + +Examples using ``DefaultDict``: +```julia +dd = DefaultDict(1) # create an (Any=>Any) DefaultDict with a default value of 1 +dd = DefaultDict(String, Int, 0) # create a (String=>Int) DefaultDict with a default value of 0 + +d = ['a'=>1, 'b'=>2] +dd = DefaultDict(0, d) # provide a default value to an existing dictionary +dd['c'] == 0 # true +d['c'] == 0 # true + +dd = DefaultDict(time) # call time() to provide the default value +dd = DefaultDict(Dict) # Create a dictionary of dictionaries + # Dict() is called to provide the default value +dd = DefaultDict(()->myfunc()) # call function myfunc to provide the default value + +# create a Dictionary of String=>DefaultDict{String, Int}, where the default of the +# inner set of DefaultDicts is zero +dd = DefaultDict(String, DefaultDict, ()->DefaultDict(String,Int,0)) +``` + +Note that in the last example, we need to use a function to create each new ``DefaultDict``. +If we forget, we will end up using the same ``DefaultDict`` for all default values: + +```julia +julia> dd = DefaultDict(String, DefaultDict, DefaultDict(String,Int,0)); + +julia> dd["a"] +DefaultDict{String,Int64,Int64,Dict{K,V}}() + +julia> dd["b"]["a"] = 1 +1 + +julia> dd["a"] +["a"=>1] + +``` diff --git a/run_tests.jl b/run_tests.jl index 601994036..7a3d71b7a 100644 --- a/run_tests.jl +++ b/run_tests.jl @@ -4,7 +4,8 @@ tests = ["deque", "classifiedcollections", "disjoint_set", "binheap", - "mutable_binheap"] + "mutable_binheap", + "defaultdict"] for t in tests fp = joinpath("test", "test_$t.jl") diff --git a/src/DataStructures.jl b/src/DataStructures.jl index 5e7ad6f18..923244e6a 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -1,10 +1,10 @@ module DataStructures - import Base.length, Base.isempty, Base.start, Base.next, Base.done - import Base.show, Base.dump, Base.empty!, Base.getindex - import Base.haskey, Base.keys, Base.merge, Base.copy - import Base.push!, Base.pop!, Base.shift!, Base.unshift!, Base.add! - import Base.union! + import Base: length, isempty, start, next, done, + show, dump, empty!, getindex, setindex!, get, + in, haskey, keys, merge, copy, + push!, pop!, shift!, unshift!, add!, + union!, delete!, similar, sizehint export Deque, Stack, Queue export deque, stack, queue, enqueue!, dequeue!, update! @@ -19,6 +19,8 @@ module DataStructures export AbstractHeap, compare, extract_all! export BinaryHeap, binary_minheap, binary_maxheap export MutableBinaryHeap, mutable_binary_minheap, mutable_binary_maxheap + + export DefaultDict include("deque.jl") include("stack.jl") @@ -27,4 +29,5 @@ module DataStructures include("classifiedcollections.jl") include("disjoint_set.jl") include("heaps.jl") + include("defaultdict.jl") end diff --git a/src/defaultdict.jl b/src/defaultdict.jl new file mode 100644 index 000000000..746d413e3 --- /dev/null +++ b/src/defaultdict.jl @@ -0,0 +1,73 @@ +# Dictionary which returns (and sets) a default value for a requested item not +# already in to the dictionary + +immutable DefaultDict{K,V,F,D<:Associative} <: Associative{K,V} + default::F + d::D + + DefaultDict(x::F, kv::AbstractArray{(K,V)}) = new(x, D{K,V}(kv)) + DefaultDict(x::F, d::DefaultDict) = DefaultDict(x, d.d) + DefaultDict(x::F, d::D=D{K,V}()) = new(x, d) + DefaultDict(x, ks, vs) = new(x, D{K,V}(ks,vs)) +end + +DefaultDict() = error("DefaultDict: no default specified") +DefaultDict(k,v) = error("DefaultDict: no default specified") + +# TODO: these mimic similar Dict constructors, but may not be needed +DefaultDict{K,V,F}(default::F, ks::AbstractArray{K}, vs::AbstractArray{V}) = DefaultDict{K,V,F,Dict}(default,ks,vs) +DefaultDict{F}(default::F,ks,vs) = DefaultDict{Any,Any,F,Dict}(default, ks, vs) + +# syntax entry points +DefaultDict{F}(default::F) = DefaultDict{Any,Any,F,Dict}(default) +DefaultDict{K,V,F}(::Type{K}, ::Type{V}, default::F) = DefaultDict{K,V,F,Dict}(default) +DefaultDict{K,V,F}(default::F, kv::AbstractArray{(K,V)}) = DefaultDict{K,V,F,Dict}(default, kv) +DefaultDict{F,D<:Associative}(default::F, d::D) = ((K,V)=eltype(d); DefaultDict{K,V,F,D}(default, d)) + +similar{K,V,F,D}(d::DefaultDict{K,V,F,D}) = DefaultDict{K,V,F,D}() + +sizehint(d::DefaultDict) = sizehint(d.d) +empty!(d::DefaultDict) = empty!(d.d) +setindex!(d::DefaultDict, v, k) = setindex!(d.d, v, k) + +# Note that getindex depends on the particular implementation of Dict in Base. +# If the Dict implementation changes, this may break. +# Also note that we hash twice here if the key is not in the dictionary: once +# when retrieving, and once when assigning. +function getindex{K,V,F<:Base.Callable}(d::DefaultDict{K,V,F}, key) + index = Base.ht_keyindex(d.d, key) + if index < 0 + d.d[key] = ret = convert(V, d.default()) + return ret::V + end + return d.d.vals[index]::V +end + +function getindex{K,V}(d::DefaultDict{K,V}, key) + index = Base.ht_keyindex(d.d, key) + if index < 0 + d.d[key] = ret = convert(V, d.default) + return ret::V + end + return d.d.vals[index]::V +end + +get(d::DefaultDict, key, deflt) = get(d.d, key, deflt) + +haskey(d::DefaultDict, key) = haskey(d.d, key) +in{T<:DefaultDict}(key, v::Base.KeyIterator{T}) = key in keys(v.dict.d) +getkey(d::DefaultDict, key, deflt) = getkey(d.d, key, deflt) + +pop!(d::DefaultDict, key) = pop!(d.d, key) +delete!(d::DefaultDict, key) = delete!(d.d, key) + +start(d::DefaultDict) = start(d.d) +done(d::DefaultDict, i) = done(d.d,i) +next(d::DefaultDict, i) = next(d.d,i) + +isempty(d::DefaultDict) = isempty(d.d) +length(d::DefaultDict) = length(d.d) + +next{T<:DefaultDict}(v::Base.KeyIterator{T}, i) = (v.dict.d.keys[i], Base.skip_deleted(v.dict.d,i+1)) +next{T<:DefaultDict}(v::Base.ValueIterator{T}, i) = (v.dict.d.vals[i], Base.skip_deleted(v.dict.d,i+1)) + diff --git a/test/test_defaultdict.jl b/test/test_defaultdict.jl new file mode 100644 index 000000000..eb272b21c --- /dev/null +++ b/test/test_defaultdict.jl @@ -0,0 +1,51 @@ +using DataStructures +using Base.Test + +# construction +@test_throws DefaultDict() +@test_throws DefaultDict(String, Int) + +# empty dictionary +d = DefaultDict(Char, Int, 1) +@test length(d) == 0 +@test isempty(d) +@test d['c'] == 1 +@test !isempty(d) +empty!(d) +@test isempty(d) + +# access, modification +@test (d['a'] += 1) == 2 +@test 'a' in keys(d) +@test haskey(d, 'a') +@test get(d, 'b', 0) == 0 +@test !('b' in keys(d)) +@test !haskey(d, 'b') +@test pop!(d, 'a') == 2 +@test isempty(d) + +for c in 'a':'z' + d[c] = c-'a'+1 +end + +@test d['z'] == 26 +@test d['@'] == 1 +@test length(d) == 27 +delete!(d, '@') +@test length(d) == 26 + +for (k,v) in d + @test v == k-'a'+1 +end + +@test sort(collect(keys(d))) == ['a':'z'] +@test sort(collect(values(d))) == [1:26] + +# Starting from an existing dictionary +e = ['a'=>1, 'b'=>3, 'c'=>5] +f = DefaultDict(0, e) +@test_throws e['d'] +@test f['d'] == 0 +f['e'] = 9 +@test e['d'] == 0 +@test e['e'] == 9