Skip to content

Commit

Permalink
try the ordered Dict representation mentioned in #10092
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson committed Feb 7, 2015
1 parent b3a17a5 commit d875952
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 109 deletions.
223 changes: 118 additions & 105 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -321,16 +321,14 @@ copy(o::ObjectIdDict) = ObjectIdDict(o)
# dict

type Dict{K,V} <: Associative{K,V}
slots::Array{UInt8,1}
slots::Array{Int32,1}
keys::Array{K,1}
vals::Array{V,1}
ndel::Int
count::Int
deleter::Function

function Dict()
n = 16
new(zeros(UInt8,n), Array(K,n), Array(V,n), 0, 0, identity)
new(zeros(Int32,16), Array(K,0), Array(V,0), 0, identity)
end
function Dict(kv)
h = Dict{K,V}()
Expand Down Expand Up @@ -379,6 +377,9 @@ dict_with_eltype(kv, t) = Dict{Any,Any}(kv)

similar{K,V}(d::Dict{K,V}) = Dict{K,V}()

length(d::Dict) = length(d.keys) - d.ndel
isempty(d::Dict) = (length(d)==0)

# conversion between Dict types
function convert{K,V}(::Type{Dict{K,V}},d::Associative)
h = Dict{K,V}()
Expand Down Expand Up @@ -416,99 +417,124 @@ end

hashindex(key, sz) = ((hash(key)%Int) & (sz-1)) + 1

isslotempty(h::Dict, i::Int) = h.slots[i] == 0x0
isslotfilled(h::Dict, i::Int) = h.slots[i] == 0x1
isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x2
function isdeleted{K}(keys::Vector{K}, ki, slots)
if isbits(K)
sz = length(slots)
iter = 0
maxprobe = max(16, sz>>6)
index = hashindex(keys[ki], sz)
while iter <= maxprobe
si = slots[index]
(si == 0 || si == ki) && return false
si == -ki && return true
index = (index & (sz-1)) + 1
iter += 1
end
return false
else
!isdefined(keys, ki)
end
end

function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.keys))
function rehash!{K,V}(h::Dict{K,V}, newsz = length(h.slots))
olds = h.slots
oldk = h.keys
oldv = h.vals
keys = h.keys
vals = h.vals
sz = length(olds)
newsz = _tablesz(newsz)
if h.count == 0
count0 = length(h)
if count0 == 0
resize!(h.slots, newsz)
fill!(h.slots, 0)
resize!(h.keys, newsz)
resize!(h.vals, newsz)
resize!(h.keys, 0)
resize!(h.vals, 0)
h.ndel = 0
return h
end

slots = zeros(UInt8,newsz)
keys = Array(K, newsz)
vals = Array(V, newsz)
count0 = h.count
count = 0

for i = 1:sz
if olds[i] == 0x1
k = oldk[i]
v = oldv[i]
slots = zeros(Int32,newsz)

if h.ndel > 0
to = 1
@inbounds for from = 1:length(keys)
if !isdeleted(keys, from, olds)
# TODO avoid computing hash twice for isbits
k = keys[from]
index = hashindex(k, newsz)
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
slots[index] = to
keys[to] = k
vals[to] = vals[from]
to += 1
end
end
resize!(keys, to-1)
resize!(vals, to-1)
else
@inbounds for i = 1:count0
k = keys[i]
index = hashindex(k, newsz)
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
slots[index] = 0x1
keys[index] = k
vals[index] = v
count += 1
slots[index] = i
end
end

# TODO restore this piece of logic:
#=
if h.count != count0
# if items are removed by finalizers, retry
return rehash!(h, newsz)
end
end
end

=#
h.slots = slots
h.keys = keys
h.vals = vals
h.count = count
h.ndel = 0
est = div(newsz*2, 3)
sizehint!(h.keys, est)
sizehint!(h.vals, est)

return h
end

function sizehint!(d::Dict, newsz)
slotsz = (newsz*3)>>1
oldsz = length(d.slots)
if newsz <= oldsz
if slotsz <= oldsz
# todo: shrink
# be careful: rehash!() assumes everything fits. it was only designed
# for growing.
return d
end
# grow at least 25%
newsz = max(newsz, (oldsz*5)>>2)
rehash!(d, newsz)
slotsz = max(slotsz, (oldsz*5)>>2)
rehash!(d, slotsz)
end

function empty!{K,V}(h::Dict{K,V})
fill!(h.slots, 0x0)
sz = length(h.slots)
fill!(h.slots, 0)
empty!(h.keys)
empty!(h.vals)
resize!(h.keys, sz)
resize!(h.vals, sz)
h.ndel = 0
h.count = 0
return h
end

# get the index where a key is stored, or -1 if not present
function ht_keyindex{K,V}(h::Dict{K,V}, key)
sz = length(h.keys)
function ht_keyindex{K,V}(h::Dict{K,V}, key, direct)
slots = h.slots
sz = length(slots)
iter = 0
maxprobe = max(16, sz>>6)
index = hashindex(key, sz)
keys = h.keys

while true
if isslotempty(h,index)
break
end
if !isslotmissing(h,index) && isequal(key,keys[index])
return index
@inbounds while true
si = slots[index]
si == 0 && break
if si > 0 && isequal(key, keys[si])
return ifelse(direct, oftype(index, si), index)
end

index = (index & (sz-1)) + 1
Expand All @@ -523,52 +549,48 @@ end
# and the key would be inserted at pos
# This version is for use by setindex! and get!
function ht_keyindex2{K,V}(h::Dict{K,V}, key)
sz = length(h.keys)
slots = h.slots
sz = length(slots)
iter = 0
maxprobe = max(16, sz>>6)
index = hashindex(key, sz)
avail = 0
keys = h.keys

while true
if isslotempty(h,index)
avail < 0 && return avail
@inbounds while true
si = slots[index]
if si == 0
return -index
end

if isslotmissing(h,index)
if avail == 0
# found an available slot, but need to keep scanning
# in case "key" already exists in a later collided slot.
avail = -index
end
elseif isequal(key, keys[index])
return index
elseif si > 0 && isequal(key, keys[si])
return oftype(index, si)
end

index = (index & (sz-1)) + 1
iter+=1
iter > maxprobe && break
end

avail < 0 && return avail

rehash!(h, h.count > 64000 ? sz*2 : sz*4)
rehash!(h, length(h) > 64000 ? sz*2 : sz*4)

return ht_keyindex2(h, key)
end

function _setindex!(h::Dict, v, key, index)
h.slots[index] = 0x1
h.keys[index] = key
h.vals[index] = v
h.count += 1
hk, hv = h.keys, h.vals
#push!(h.keys, key)
ccall(:jl_array_grow_end, Void, (Any, UInt), hk, 1)
nk = length(hk)
@inbounds hk[nk] = key
#push!(h.vals, v)
ccall(:jl_array_grow_end, Void, (Any, UInt), hv, 1)
@inbounds hv[nk] = v
@inbounds h.slots[index] = nk

sz = length(h.keys)
sz = length(h.slots)
cnt = nk - h.ndel
# Rehash now if necessary
if h.ndel >= ((3*sz)>>2) || h.count*3 > sz*2
if h.ndel >= ((3*nk)>>2) || cnt*3 > sz*2
# > 3/4 deleted or > 2/3 full
rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
rehash!(h, cnt > 64000 ? sz*2 : sz*4)
end
end

Expand Down Expand Up @@ -632,9 +654,8 @@ macro get!(h, key0, default)
end
idx = ht_keyindex2($(esc(h)), key)
if idx < 0
idx = -idx
v = convert(V, $(esc(default)))
_setindex!($(esc(h)), v, key, idx)
_setindex!($(esc(h)), v, key, -idx)
else
@inbounds v = $(esc(h)).vals[idx]
end
Expand All @@ -644,76 +665,68 @@ end


function getindex{K,V}(h::Dict{K,V}, key)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, true)
return (index<0) ? throw(KeyError(key)) : h.vals[index]::V
end

function get{K,V}(h::Dict{K,V}, key, default)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, true)
return (index<0) ? default : h.vals[index]::V
end

function get{K,V}(default::Callable, h::Dict{K,V}, key)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, true)
return (index<0) ? default() : h.vals[index]::V
end

haskey(h::Dict, key) = (ht_keyindex(h, key) >= 0)
in{T<:Dict}(key, v::KeyIterator{T}) = (ht_keyindex(v.dict, key) >= 0)
haskey(h::Dict, key) = (ht_keyindex(h, key, true) >= 0)
in{T<:Dict}(key, v::KeyIterator{T}) = (ht_keyindex(v.dict, key, true) >= 0)

function getkey{K,V}(h::Dict{K,V}, key, default)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, true)
return (index<0) ? default : h.keys[index]::K
end

function _pop!(h::Dict, index)
val = h.vals[index]
val = h.vals[h.slots[index]]
_delete!(h, index)
return val
end

function pop!(h::Dict, key)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, false)
index > 0 ? _pop!(h, index) : throw(KeyError(key))
end

function pop!(h::Dict, key, default)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, false)
index > 0 ? _pop!(h, index) : default
end

function _delete!(h::Dict, index)
h.slots[index] = 0x2
ccall(:jl_arrayunset, Void, (Any, UInt), h.keys, index-1)
ccall(:jl_arrayunset, Void, (Any, UInt), h.vals, index-1)
ki = h.slots[index]
h.slots[index] = -ki
ccall(:jl_arrayunset, Void, (Any, UInt), h.keys, ki-1)
ccall(:jl_arrayunset, Void, (Any, UInt), h.vals, ki-1)
h.ndel += 1
h.count -= 1
h
end

function delete!(h::Dict, key)
index = ht_keyindex(h, key)
index = ht_keyindex(h, key, false)
if index > 0; _delete!(h, index); end
h
end

function skip_deleted(h::Dict, i)
L = length(h.slots)
while i<=L && !isslotfilled(h,i)
i += 1
end
return i
function start(t::Dict)
t.ndel > 0 && rehash!(t)
1
end
done(t::Dict, i) = done(t.keys, i)
next(t::Dict, i) = ((t.keys[i],t.vals[i]), i+1)

start(t::Dict) = skip_deleted(t, 1)
done(t::Dict, i) = done(t.vals, i)
next(t::Dict, i) = ((t.keys[i],t.vals[i]), skip_deleted(t,i+1))

isempty(t::Dict) = (t.count == 0)
length(t::Dict) = t.count

next{T<:Dict}(v::KeyIterator{T}, i) = (v.dict.keys[i], skip_deleted(v.dict,i+1))
next{T<:Dict}(v::ValueIterator{T}, i) = (v.dict.vals[i], skip_deleted(v.dict,i+1))
next{T<:Dict}(v::KeyIterator{T}, i) = (v.dict.keys[i], i+1)
next{T<:Dict}(v::ValueIterator{T}, i) = (v.dict.vals[i], i+1)

# weak key dictionaries

Expand Down
1 change: 0 additions & 1 deletion base/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ precompile(Base.isequal, (VersionNumber, VersionNumber))
precompile(Base.isequal, (Void, Void))
precompile(Base.isfile, (ASCIIString,))
precompile(Base.ismatch, (Regex, ASCIIString))
precompile(Base.isslotempty, (Dict{Any,Any}, Int))
precompile(Base.istaskdone, (Task,))
precompile(Base.joinpath, (ASCIIString, ASCIIString))
precompile(Base.joinpath, (ASCIIString, ASCIIString, ASCIIString))
Expand Down
Loading

2 comments on commit d875952

@StefanKarpinski
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you like it? Any sense of its performance?

@JeffBezanson
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the pull request, I have some commentary there.

Please sign in to comment.