Skip to content

Commit

Permalink
Store maximum probe length in OrderedDicts
Browse files Browse the repository at this point in the history
* When sorting an OrderedDict, it's possible that the probe length will
  exceed the maximum probe length previously defined.  To prevent this,
  store and use the maximum probe length (and let it grow beyond the
  previously fixed length if needed).
* Also now reuses deleted slots instead of skipping them.  Previously,
  deleted slots were not reclaimed until the dictionary was rehashed.
  • Loading branch information
kmsquire committed Jan 30, 2021
1 parent 346f55a commit d3facfd
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 14 deletions.
62 changes: 48 additions & 14 deletions src/ordered_dict.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# These can be changed, to trade off better performance for space
const global maxallowedprobe = isdefined(Base, :maxallowedprobe) ? Base.maxallowedprobe : 16
const global maxprobeshift = isdefined(Base, :maxprobeshift) ? Base.maxprobeshift : 6

# OrderedDict

"""
Expand All @@ -11,10 +15,11 @@ mutable struct OrderedDict{K,V} <: AbstractDict{K,V}
keys::Array{K,1}
vals::Array{V,1}
ndel::Int
maxprobe::Int
dirty::Bool

function OrderedDict{K,V}() where {K,V}
new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, false)
new{K,V}(zeros(Int32,16), Vector{K}(), Vector{V}(), 0, 0, false)
end
function OrderedDict{K,V}(kv) where {K,V}
h = OrderedDict{K,V}()
Expand All @@ -37,7 +42,7 @@ mutable struct OrderedDict{K,V} <: AbstractDict{K,V}
rehash!(d)
end
@assert d.ndel == 0
new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0)
new{K,V}(copy(d.slots), copy(d.keys), copy(d.vals), 0, d.maxprobe, false)
end
end
OrderedDict() = OrderedDict{Any,Any}()
Expand Down Expand Up @@ -104,6 +109,10 @@ function convert(::Type{OrderedDict{K,V}}, d::AbstractDict) where {K,V}
end
convert(::Type{OrderedDict{K,V}},d::OrderedDict{K,V}) where {K,V} = d

isslotempty(slot_value::Integer) = slot_value == 0
isslotfilled(slot_value::Integer) = slot_value > 0
isslotmissing(slot_value::Integer) = slot_value < 0

function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
olds = h.slots
keys = h.keys
Expand All @@ -122,6 +131,7 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
end

slots = zeros(Int32, newsz)
maxprobe = 0

if h.ndel > 0
ndel0 = h.ndel
Expand All @@ -139,22 +149,24 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
isdeleted = false
if !ptrs
iter = 0
maxprobe = max(16, sz>>6)
index = (hashk & (sz-1)) + 1
while iter <= maxprobe
while iter <= h.maxprobe
si = olds[index]
#si == 0 && break # shouldn't happen
si == from && break
si == -from && (isdeleted=true; break)
# if we find si == 0, then the key was deleted and it's slot was reused/overwritten
(si == -from || si == 0) && (isdeleted = true; break)
index = (index & (sz-1)) + 1
iter += 1
end
iter > h.maxprobe && (isdeleted = true) # Another case where the slot was reused/overwritten
end
if !isdeleted
index = (hashk & (newsz-1)) + 1
index0 = index = (hashk & (newsz-1)) + 1
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
probe = (index - index0) & (newsz-1)
probe > maxprobe && (maxprobe = probe)
slots[index] = to
newkeys[to] = k
newvals[to] = vals[from]
Expand All @@ -172,10 +184,12 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
else
@inbounds for i = 1:count0
k = keys[i]
index = hashindex(k, newsz)
index0 = index = hashindex(k, newsz)
while slots[index] != 0
index = (index & (newsz-1)) + 1
end
probe = (index - index0) & (newsz-1)
probe > maxprobe && (maxprobe = probe)
slots[index] = i
if h.ndel > 0
# if items are removed by finalizers, retry
Expand All @@ -185,6 +199,7 @@ function rehash!(h::OrderedDict{K,V}, newsz = length(h.slots)) where {K,V}
end

h.slots = slots
h.maxprobe = maxprobe
return h
end

Expand Down Expand Up @@ -216,14 +231,14 @@ function ht_keyindex(h::OrderedDict{K,V}, key, direct) where {K,V}
slots = h.slots
sz = length(slots)
iter = 0
maxprobe = max(16, sz>>6)
maxprobe = h.maxprobe
index = hashindex(key, sz)
keys = h.keys

@inbounds while iter <= maxprobe
si = slots[index]
si == 0 && break
if si > 0 && isequal(key, keys[si])
isslotempty(si) && break
if isslotfilled(si) && isequal(key, keys[si])
return ifelse(direct, oftype(index, si), index)
end

Expand All @@ -241,22 +256,41 @@ function ht_keyindex2(h::OrderedDict{K,V}, key) where {K,V}
slots = h.slots
sz = length(slots)
iter = 0
maxprobe = max(16, sz>>6)
maxprobe = h.maxprobe
index = hashindex(key, sz)
keys = h.keys
avail = 0

@inbounds while iter <= maxprobe
si = slots[index]
if si == 0
if isslotempty(si)
avail < 0 && return avail
return -index
elseif si > 0 && isequal(key, keys[si])
end

if isslotmissing(si)
avail == 0 && (avail = -index)
elseif isequal(key, keys[si])
return oftype(index, si)
end

index = (index & (sz-1)) + 1
iter += 1
end

avail < 0 && return avail

# If key is not present, may need to keep searching to find slot
maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
@inbounds while iter < maxallowed
if !isslotfilled(slots[index])
h.maxprobe = iter
return -index
end
index = (index & (sz-1)) + 1
iter += 1
end

rehash!(h, length(h) > 64000 ? sz*2 : sz*4)

return ht_keyindex2(h, key)
Expand Down
15 changes: 15 additions & 0 deletions test/test_ordered_dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -435,4 +435,19 @@ using OrderedCollections, Test
@test eltype(OrderedDict(tuple(String => String, SubString => SubString))) == Pair{Type,Type}
end

@testset "Issue #71" begin
od = OrderedDict(Dict(i=>0 for i=1:158))
sort!(od)
@test od[158] == 0
end

@testset "Issue #71b" begin
# This is actually a simplified version of #60, which was triggered while fixing #71
# It doesn't actually fail on previous versions of OrderedCollections
od = OrderedDict{Int,Int}(13=>13)
delete!( od, 13 )
od[14]=14
@test od[14] == 14
end

end # @testset OrderedDict

0 comments on commit d3facfd

Please sign in to comment.