From fcc6d17b66f87ef701a4dfb6ac8a3af343644fdf Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Sun, 16 Sep 2018 06:25:50 -0500 Subject: [PATCH] Use OrderedCollections Closes #392 --- Project.toml | 4 + REQUIRE | 1 + src/DataStructures.jl | 9 +- src/dict_sorting.jl | 25 --- src/dict_support.jl | 5 +- src/ordered_dict.jl | 437 ------------------------------------------ src/ordered_set.jl | 112 ----------- 7 files changed, 9 insertions(+), 584 deletions(-) delete mode 100644 src/dict_sorting.jl delete mode 100644 src/ordered_dict.jl delete mode 100644 src/ordered_set.jl diff --git a/Project.toml b/Project.toml index efb4354cc..6db8c3913 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ version = "0.12.0" [deps] InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" @@ -13,3 +14,6 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [targets] test = ["Test", "Primes", "Random", "Serialization"] + +[compat] +OrderedCollections = "1.0.1" diff --git a/REQUIRE b/REQUIRE index 859ad4616..2d93685dd 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1 +1,2 @@ julia 0.7 +OrderedCollections 1.0.1 diff --git a/src/DataStructures.jl b/src/DataStructures.jl index bfdf2d388..ea49a4f95 100644 --- a/src/DataStructures.jl +++ b/src/DataStructures.jl @@ -16,7 +16,8 @@ module DataStructures eachindex, keytype, valtype import Base: iterate - using InteractiveUtils: methodswith + using OrderedCollections + import OrderedCollections: filter, filter!, isordered export complement, complement! @@ -67,10 +68,8 @@ module DataStructures include("disjoint_set.jl") include("heaps.jl") - include("dict_support.jl") - include("ordered_dict.jl") - include("ordered_set.jl") include("default_dict.jl") + include("dict_support.jl") include("trie.jl") include("int_set.jl") @@ -88,8 +87,6 @@ module DataStructures include("tokens2.jl") include("container_loops.jl") - include("dict_sorting.jl") - export CircularBuffer, capacity, diff --git a/src/dict_sorting.jl b/src/dict_sorting.jl deleted file mode 100644 index b08dd9e75..000000000 --- a/src/dict_sorting.jl +++ /dev/null @@ -1,25 +0,0 @@ -# Sort for dicts -import Base: sort, sort! - -function sort!(d::OrderedDict; byvalue::Bool=false, args...) - if d.ndel > 0 - rehash!(d) - end - - if byvalue - p = sortperm(d.vals; args...) - else - p = sortperm(d.keys; args...) - end - d.keys = d.keys[p] - d.vals = d.vals[p] - rehash!(d) - return d -end - -sort(d::OrderedDict; args...) = sort!(copy(d); args...) -sort(d::Dict; args...) = sort!(OrderedDict(d); args...) -## Uncomment these after #224 is merged -# sort!(d::DefaultOrderedDict; args...) = (sort!(d.d.d; args...); d) -# sort(d::DefaultDict; args...) = DefaultOrderedDict(d.d.default, sort(d.d.d; args...)) -# sort(d::DefaultOrderedDict; args...) = DefaultOrderedDict(d.d.default, sort!(copy(d.d.d); args...)) diff --git a/src/dict_support.jl b/src/dict_support.jl index 29fdf335a..6a95ff1fd 100644 --- a/src/dict_support.jl +++ b/src/dict_support.jl @@ -1,9 +1,6 @@ # support functions -# _tablesz and hashindex are defined in Base, but are not exported, -# so they are redefined here. -_tablesz(x::Integer) = x < 16 ? 16 : one(x)<<((sizeof(x)<<3)-leading_zeros(x-1)) -hashindex(key, sz) = (reinterpret(Int,(hash(key))) & (sz-1)) + 1 +using InteractiveUtils: methodswith function not_iterator_of_pairs(kv) return any(x->isempty(methodswith(typeof(kv), x, true)), diff --git a/src/ordered_dict.jl b/src/ordered_dict.jl deleted file mode 100644 index 74d05c717..000000000 --- a/src/ordered_dict.jl +++ /dev/null @@ -1,437 +0,0 @@ -# OrderedDict - -import Base: haskey, get, get!, getkey, delete!, push!, pop!, empty!, - setindex!, getindex, length, isempty, - keys, values, setdiff, setdiff!, - union, union!, intersect, filter, filter!, - hash, eltype, ValueIterator, convert, copy, - merge, empty - -""" - OrderedDict - -`OrderedDict`s are simply dictionaries whose entries have a particular order. The order -refers to insertion order, which allows deterministic iteration over the dictionary or set. -""" -mutable struct OrderedDict{K,V} <: AbstractDict{K,V} - slots::Array{Int32,1} - keys::Array{K,1} - vals::Array{V,1} - ndel::Int - dirty::Bool - - function OrderedDict{K,V}() where {K,V} - new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, false) - end - function OrderedDict{K,V}(kv) where {K,V} - h = OrderedDict{K,V}() - for (k,v) in kv - h[k] = v - end - return h - end - OrderedDict{K,V}(p::Pair) where {K,V} = setindex!(OrderedDict{K,V}(), p.second, p.first) - function OrderedDict{K,V}(ps::Pair...) where {K,V} - h = OrderedDict{K,V}() - sizehint!(h, length(ps)) - for p in ps - h[p.first] = p.second - end - return h - end - function OrderedDict{K,V}(d::OrderedDict{K,V}) where {K,V} - if d.ndel > 0 - rehash!(d) - end - @assert d.ndel == 0 - new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0) - end -end -OrderedDict() = OrderedDict{Any,Any}() -OrderedDict(kv::Tuple{}) = OrderedDict() -copy(d::OrderedDict) = OrderedDict(d) - - -# TODO: this can probably be simplified using `eltype` as a THT (Tim Holy trait) -# OrderedDict{K,V}(kv::Tuple{Vararg{Tuple{K,V}}}) = OrderedDict{K,V}(kv) -# OrderedDict{K }(kv::Tuple{Vararg{Tuple{K,Any}}}) = OrderedDict{K,Any}(kv) -# OrderedDict{V }(kv::Tuple{Vararg{Tuple{Any,V}}}) = OrderedDict{Any,V}(kv) -OrderedDict(kv::Tuple{Vararg{Pair{K,V}}}) where {K,V} = OrderedDict{K,V}(kv) -OrderedDict(kv::Tuple{Vararg{Pair{K}}}) where {K} = OrderedDict{K,Any}(kv) -OrderedDict(kv::Tuple{Vararg{Pair{K,V} where K}}) where {V} = OrderedDict{Any,V}(kv) -OrderedDict(kv::Tuple{Vararg{Pair}}) = OrderedDict{Any,Any}(kv) - -OrderedDict(kv::AbstractArray{Tuple{K,V}}) where {K,V} = OrderedDict{K,V}(kv) -OrderedDict(kv::AbstractArray{Pair{K,V}}) where {K,V} = OrderedDict{K,V}(kv) -OrderedDict(kv::AbstractDict{K,V}) where {K,V} = OrderedDict{K,V}(kv) - -OrderedDict(ps::Pair{K,V}...) where {K,V} = OrderedDict{K,V}(ps) -OrderedDict(ps::Pair{K}...,) where {K} = OrderedDict{K,Any}(ps) -OrderedDict(ps::(Pair{K,V} where K)...,) where {V} = OrderedDict{Any,V}(ps) -OrderedDict(ps::Pair...) = OrderedDict{Any,Any}(ps) - -function OrderedDict(kv) - try - dict_with_eltype(kv, eltype(kv)) - catch e - if any(x->isempty(methods(x, (typeof(kv),))), [iterate]) || - !all(x->isa(x,Union{Tuple,Pair}),kv) - throw(ArgumentError("Dict(kv): kv needs to be an iterator of tuples or pairs")) - else - rethrow(e) - end - end -end - -dict_with_eltype(kv, ::Type{Tuple{K,V}}) where {K,V} = OrderedDict{K,V}(kv) -dict_with_eltype(kv, ::Type{Pair{K,V}}) where {K,V} = OrderedDict{K,V}(kv) -dict_with_eltype(kv, t) = OrderedDict{Any,Any}(kv) - -empty(d::OrderedDict{K,V}) where {K,V} = OrderedDict{K,V}() -@deprecate similar(d::OrderedDict) empty(d) - -length(d::OrderedDict) = length(d.keys) - d.ndel -isempty(d::OrderedDict) = (length(d)==0) - -""" - isordered(::Type) - -Property of associative containers, that is `true` if the container type has a -defined order (such as `OrderedDict` and `SortedDict`), and `false` otherwise. -""" -isordered(::Type{T}) where {T<:AbstractDict} = false -isordered(::Type{T}) where {T<:OrderedDict} = true - -# conversion between OrderedDict types -function convert(::Type{OrderedDict{K,V}}, d::AbstractDict) where {K,V} - if !isordered(typeof(d)) - Base.depwarn("Conversion to OrderedDict is deprecated for unordered associative containers (in this case, $(typeof(d))). Use an ordered or sorted associative type, such as SortedDict and OrderedDict.", :convert) - end - h = OrderedDict{K,V}() - for (k,v) in d - ck = convert(K,k) - if !haskey(h,ck) - h[ck] = convert(V,v) - else - error("key collision during dictionary conversion") - end - end - return h -end -convert(::Type{OrderedDict{K,V}},d::OrderedDict{K,V}) where {K,V} = d - -function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V} - olds = h.slots - keys = h.keys - vals = h.vals - sz = length(olds) - newsz = _tablesz(newsz) - h.dirty = true - count0 = length(h) - if count0 == 0 - resize!(h.slots, newsz) - fill!(h.slots, 0) - resize!(h.keys, 0) - resize!(h.vals, 0) - h.ndel = 0 - return h - end - - slots = zeros(Int32,newsz) - - if h.ndel > 0 - ndel0 = h.ndel - ptrs = !isbitstype(K) - to = 1 - # TODO: to get the best performance we need to avoid reallocating these. - # This algorithm actually works in place, unless the dict is modified - # due to GC during this process. - newkeys = similar(keys, count0) - newvals = similar(vals, count0) - @inbounds for from = 1:length(keys) - if !ptrs || isassigned(keys, from) - k = keys[from] - hashk = hash(k)%Int - isdeleted = false - if !ptrs - iter = 0 - maxprobe = max(16, sz>>6) - index = (hashk & (sz-1)) + 1 - while iter <= maxprobe - si = olds[index] - #si == 0 && break # shouldn't happen - si == from && break - si == -from && (isdeleted=true; break) - index = (index & (sz-1)) + 1 - iter += 1 - end - end - if !isdeleted - index = (hashk & (newsz-1)) + 1 - while slots[index] != 0 - index = (index & (newsz-1)) + 1 - end - slots[index] = to - newkeys[to] = k - newvals[to] = vals[from] - to += 1 - end - if h.ndel != ndel0 - # if items are removed by finalizers, retry - return rehash!(h, newsz) - end - end - end - h.keys = newkeys - h.vals = newvals - h.ndel = 0 - else - @inbounds for i = 1:count0 - k = keys[i] - index = hashindex(k, newsz) - while slots[index] != 0 - index = (index & (newsz-1)) + 1 - end - slots[index] = i - if h.ndel > 0 - # if items are removed by finalizers, retry - return rehash!(h, newsz) - end - end - end - - h.slots = slots - return h -end - -function sizehint!(d::OrderedDict, newsz) - slotsz = (newsz*3)>>1 - oldsz = length(d.slots) - if slotsz <= oldsz - # todo: shrink - # be careful: rehash!() assumes everything fits. it was only designed - # for growing. - return d - end - # grow at least 25% - slotsz = max(slotsz, (oldsz*5)>>2) - rehash!(d, slotsz) -end - -function empty!(h::OrderedDict{K,V}) where {K,V} - fill!(h.slots, 0) - empty!(h.keys) - empty!(h.vals) - h.ndel = 0 - h.dirty = true - return h -end - -# get the index where a key is stored, or -1 if not present -function ht_keyindex(h::OrderedDict{K,V}, key, direct) where {K,V} - slots = h.slots - sz = length(slots) - iter = 0 - maxprobe = max(16, sz>>6) - index = hashindex(key, sz) - keys = h.keys - - @inbounds while iter <= maxprobe - si = slots[index] - si == 0 && break - if si > 0 && isequal(key, keys[si]) - return ifelse(direct, oftype(index, si), index) - end - - index = (index & (sz-1)) + 1 - iter+=1 - end - - return -1 -end - -# get the index where a key is stored, or -pos if not present -# and the key would be inserted at pos -# This version is for use by setindex! and get! -function ht_keyindex2(h::OrderedDict{K,V}, key) where {K,V} - slots = h.slots - sz = length(slots) - iter = 0 - maxprobe = max(16, sz>>6) - index = hashindex(key, sz) - keys = h.keys - - @inbounds while iter <= maxprobe - si = slots[index] - if si == 0 - return -index - elseif si > 0 && isequal(key, keys[si]) - return oftype(index, si) - end - - index = (index & (sz-1)) + 1 - iter+=1 - end - - rehash!(h, length(h) > 64000 ? sz*2 : sz*4) - - return ht_keyindex2(h, key) -end - -function _setindex!(h::OrderedDict, v, key, index) - hk, hv = h.keys, h.vals - #push!(h.keys, key) - ccall(:jl_array_grow_end, Cvoid, (Any, UInt), hk, 1) - nk = length(hk) - @inbounds hk[nk] = key - #push!(h.vals, v) - ccall(:jl_array_grow_end, Cvoid, (Any, UInt), hv, 1) - @inbounds hv[nk] = v - @inbounds h.slots[index] = nk - h.dirty = true - - sz = length(h.slots) - cnt = nk - h.ndel - # Rehash now if necessary - if h.ndel >= ((3*nk)>>2) || cnt*3 > sz*2 - # > 3/4 deleted or > 2/3 full - rehash!(h, cnt > 64000 ? cnt*2 : cnt*4) - end -end - -function setindex!(h::OrderedDict{K,V}, v0, key0) where {K,V} - key = convert(K,key0) - if !isequal(key,key0) - throw(ArgumentError("$key0 is not a valid key for type $K")) - end - v = convert(V, v0) - - index = ht_keyindex2(h, key) - - if index > 0 - @inbounds h.keys[index] = key - @inbounds h.vals[index] = v - else - _setindex!(h, v, key, -index) - end - - return h -end - -function get!(h::OrderedDict{K,V}, key0, default) where {K,V} - key = convert(K,key0) - if !isequal(key,key0) - throw(ArgumentError("$key0 is not a valid key for type $K")) - end - - index = ht_keyindex2(h, key) - - index > 0 && return h.vals[index] - - v = convert(V, default) - _setindex!(h, v, key, -index) - return v -end - -function get!(default::Base.Callable, h::OrderedDict{K,V}, key0) where {K,V} - key = convert(K,key0) - if !isequal(key,key0) - throw(ArgumentError("$key0 is not a valid key for type $K")) - end - - index = ht_keyindex2(h, key) - - index > 0 && return h.vals[index] - - h.dirty = false - v = convert(V, default()) - if h.dirty - index = ht_keyindex2(h, key) - end - if index > 0 - h.keys[index] = key - h.vals[index] = v - else - _setindex!(h, v, key, -index) - end - return v -end - -function getindex(h::OrderedDict{K,V}, key) where {K,V} - index = ht_keyindex(h, key, true) - return (index<0) ? throw(KeyError(key)) : h.vals[index]::V -end - -function get(h::OrderedDict{K,V}, key, default) where {K,V} - index = ht_keyindex(h, key, true) - return (index<0) ? default : h.vals[index]::V -end - -function get(default::Base.Callable, h::OrderedDict{K,V}, key) where {K,V} - index = ht_keyindex(h, key, true) - return (index<0) ? default() : h.vals[index]::V -end - -haskey(h::OrderedDict, key) = (ht_keyindex(h, key, true) >= 0) -in(key, v::Base.KeySet{K,T}) where {K,T<:OrderedDict{K}} = (ht_keyindex(v.dict, key, true) >= 0) - -function getkey(h::OrderedDict{K,V}, key, default) where {K,V} - index = ht_keyindex(h, key, true) - return (index<0) ? default : h.keys[index]::K -end - -function _pop!(h::OrderedDict, index) - @inbounds val = h.vals[h.slots[index]] - _delete!(h, index) - return val -end - -function pop!(h::OrderedDict) - h.ndel > 0 && rehash!(h) - key = h.keys[end] - index = ht_keyindex(h, key, false) - key => _pop!(h, index) -end - -function pop!(h::OrderedDict, key) - index = ht_keyindex(h, key, false) - index > 0 ? _pop!(h, index) : throw(KeyError(key)) -end - -function pop!(h::OrderedDict, key, default) - index = ht_keyindex(h, key, false) - index > 0 ? _pop!(h, index) : default -end - -function _delete!(h::OrderedDict, index) - @inbounds ki = h.slots[index] - @inbounds h.slots[index] = -ki - ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.keys, ki-1) - ccall(:jl_arrayunset, Cvoid, (Any, UInt), h.vals, ki-1) - h.ndel += 1 - h.dirty = true - h -end - -function delete!(h::OrderedDict, key) - index = ht_keyindex(h, key, false) - if index > 0; _delete!(h, index); end - h -end - -function iterate(t::OrderedDict, i = (t.ndel > 0 && rehash!(t); 1)) - i > length(t.keys) ? nothing : (Pair(t.keys[i],t.vals[i]), i+1) -end - -function iterate(v::ValueIterator{T}, i::Int) where {T<:OrderedDict} - i > length(v.dict.vals) ? nothing : (v.dict.vals[i], i+1) -end - -function merge(d::OrderedDict, others::AbstractDict...) - K, V = keytype(d), valtype(d) - for other in others - K = promote_type(K, keytype(other)) - V = promote_type(V, valtype(other)) - end - merge!(OrderedDict{K,V}(), d, others...) -end diff --git a/src/ordered_set.jl b/src/ordered_set.jl deleted file mode 100644 index e700e8c7e..000000000 --- a/src/ordered_set.jl +++ /dev/null @@ -1,112 +0,0 @@ -# ordered sets - -# This was largely copied and modified from Base - -# TODO: Most of these functions should be removed once AbstractSet is introduced there -# (see https://github.com/JuliaLang/julia/issues/5533) - -struct OrderedSet{T} - dict::OrderedDict{T,Nothing} - - OrderedSet{T}() where {T} = new{T}(OrderedDict{T,Nothing}()) - OrderedSet{T}(xs) where {T} = union!(new{T}(OrderedDict{T,Nothing}()), xs) -end -OrderedSet() = OrderedSet{Any}() -OrderedSet(xs) = OrderedSet{eltype(xs)}(xs) - - -show(io::IO, s::OrderedSet) = (show(io, typeof(s)); print(io, "("); !isempty(s) && Base.show_comma_array(io, s,'[',']'); print(io, ")")) - -@delegate OrderedSet.dict [isempty, length] - -sizehint!(s::OrderedSet, sz::Integer) = (sizehint!(s.dict, sz); s) -eltype(s::OrderedSet{T}) where {T} = T - -in(x, s::OrderedSet) = haskey(s.dict, x) - -push!(s::OrderedSet, x) = (s.dict[x] = nothing; s) -pop!(s::OrderedSet, x) = (pop!(s.dict, x); x) -pop!(s::OrderedSet, x, deflt) = pop!(s.dict, x, deflt) == deflt ? deflt : x -delete!(s::OrderedSet, x) = (delete!(s.dict, x); s) - -getindex(x::OrderedSet,i::Int) = x.dict.keys[i] -lastindex(x::OrderedSet) = lastindex(x.dict.keys) -Base.nextind(::OrderedSet, i::Int) = i + 1 # Needed on 0.7 to mimic array indexing. -Base.keys(s::OrderedSet) = 1:length(s) - -union!(s::OrderedSet, xs) = (for x in xs; push!(s,x); end; s) -setdiff!(s::OrderedSet, xs) = (for x in xs; delete!(s,x); end; s) -setdiff!(s::Set, xs::OrderedSet) = (for x in xs; delete!(s,x); end; s) - -empty(s::OrderedSet{T}) where {T} = OrderedSet{T}() -@deprecate similar(s::OrderedSet) empty(s) - -copy(s::OrderedSet) = union!(empty(s), s) - -empty!(s::OrderedSet{T}) where {T} = (empty!(s.dict); s) - -function iterate(s::OrderedSet) - state = iterate(s.dict) - state === nothing && return nothing - s, i = state - s[1], i -end -# NOTE: manually optimized to take advantage of OrderedDict representation -iterate(s::OrderedSet, i) = i > length(s.dict.keys) ? nothing : (s.dict.keys[i], i+1) - -pop!(s::OrderedSet) = pop!(s.dict)[1] - -union(s::OrderedSet) = copy(s) -function union(s::OrderedSet, sets...) - u = OrderedSet{Base.promote_eltype(s, sets...)}() - union!(u,s) - for t in sets - union!(u,t) - end - return u -end - -intersect(s::OrderedSet) = copy(s) -function intersect(s::OrderedSet, sets...) - i = copy(s) - for x in s - for t in sets - if !in(x,t) - delete!(i,x) - break - end - end - end - return i -end - -function setdiff(a::OrderedSet, b) - d = empty(a) - for x in a - if !(x in b) - push!(d, x) - end - end - d -end - -==(l::OrderedSet, r::OrderedSet) = (length(l) == length(r)) && (l <= r) -<(l::OrderedSet, r::OrderedSet) = (length(l) < length(r)) && (l <= r) -<=(l::OrderedSet, r::OrderedSet) = issubset(l, r) - -function filter!(f::Function, s::OrderedSet) - for x in s - if !f(x) - delete!(s, x) - end - end - return s -end -filter(f::Function, s::OrderedSet) = filter!(f, copy(s)) - -const orderedset_seed = UInt === UInt64 ? 0x2114638a942a91a5 : 0xd86bdbf1 -function hash(s::OrderedSet, h::UInt) - h = hash(orderedset_seed, h) - s.dict.ndel > 0 && rehash!(s.dict) - hash(s.dict.keys, h) -end