Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimized (and ordered) IdSet code #52114

Merged
merged 4 commits into from
Nov 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
97 changes: 76 additions & 21 deletions base/idset.jl
@@ -1,36 +1,91 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Like Set, but using IdDict
mutable struct IdSet{T} <: AbstractSet{T}
dict::IdDict{T,Nothing}

IdSet{T}() where {T} = new(IdDict{T,Nothing}())
IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.dict))
mutable struct IdSet{K} <: AbstractSet{K}
list::Memory{Any}
idxs::Union{Memory{UInt8}, Memory{UInt16}, Memory{UInt32}}
count::Int
max::Int # n.b. always <= length(list)
IdSet{T}() where {T} = new(Memory{Any}(undef, 0), Memory{UInt8}(undef, 0), 0, 0)
IdSet{T}(s::IdSet{T}) where {T} = new(copy(s.list), copy(s.idxs), s.count, s.max)
end

IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
IdSet() = IdSet{Any}()

copymutable(s::IdSet) = typeof(s)(s)
emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}()
copy(s::IdSet) = typeof(s)(s)

isempty(s::IdSet) = isempty(s.dict)
length(s::IdSet) = length(s.dict)
in(@nospecialize(x), s::IdSet) = haskey(s.dict, x)
push!(s::IdSet, @nospecialize(x)) = (s.dict[x] = nothing; s)
pop!(s::IdSet, @nospecialize(x)) = (pop!(s.dict, x); x)
pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = (x in s ? pop!(s, x) : default)
delete!(s::IdSet, @nospecialize(x)) = (delete!(s.dict, x); s)
haskey(s::IdSet, @nospecialize(key)) = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, key) != -1
isempty(s::IdSet) = s.count == 0
length(s::IdSet) = s.count
in(@nospecialize(x), s::IdSet) = haskey(s, x)
function push!(s::IdSet, @nospecialize(x))
idx = ccall(:jl_idset_peek_bp, Int, (Any, Any, Any), s.list, s.idxs, x)
if idx >= 0
s.list[idx + 1] = x
else
if s.max < length(s.list)
idx = s.max
@assert !isassigned(s.list, idx + 1)
s.list[idx + 1] = x
s.max = idx + 1
else
newidx = RefValue{Int}(0)
setfield!(s, :list, ccall(:jl_idset_put_key, Any, (Any, Any, Ptr{Int}), s.list, x, newidx))
idx = newidx[]
s.max = idx < 0 ? -idx : idx + 1
end
@assert s.list[s.max] === x
setfield!(s, :idxs, ccall(:jl_idset_put_idx, Any, (Any, Any, Int), s.list, s.idxs, idx))
s.count += 1
end
s
end
function _pop!(s::IdSet, @nospecialize(x))
removed = ccall(:jl_idset_pop, Int, (Any, Any, Any), s.list, s.idxs, x)
if removed != -1
s.count -= 1
while s.max > 0 && !isassigned(s.list, s.max)
s.max -= 1
end
end
removed
end
pop!(s::IdSet, @nospecialize(x)) = _pop!(s, x) == -1 ? throw(KeyError(x)) : x
pop!(s::IdSet, @nospecialize(x), @nospecialize(default)) = _pop!(s, x) == -1 ? default : x
delete!(s::IdSet, @nospecialize(x)) = (_pop!(s, x); s)

sizehint!(s::IdSet, newsz) = (sizehint!(s.dict, newsz); s)
empty!(s::IdSet) = (empty!(s.dict); s)
function sizehint!(s::IdSet, newsz)
# TODO: grow/compact list and perform rehash, if profitable?
# TODO: shrink?
# s.list = resize(s.list, newsz)
# newsz = _tablesz(newsz)
# oldsz = length(s.idxs)
# #grow at least 25%
# if newsz < (oldsz*5)>>2
# return s
# end
# rehash!(s, newsz)
nothing
end

function empty!(s::IdSet)
fill!(s.idxs, 0x00)
list = s.list
for i = 1:s.max
_unsetindex!(list, i)
end
s.count = 0
s.max = 0
s
end

filter!(f, d::IdSet) = unsafe_filter!(f, d)

function iterate(s::IdSet, state...)
y = iterate(s.dict, state...)
y === nothing && return nothing
((k, _), i) = y
return (k, i)
function iterate(s::IdSet{S}, state=0) where {S}
while true
state += 1
state > s.max && return nothing
isassigned(s.list, state) && return s.list[state]::S, state
end
end
2 changes: 1 addition & 1 deletion src/Makefile
Expand Up @@ -306,7 +306,7 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
$(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/idset.c $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
intrinsics.cpp jitlayers.h intrinsics.h llvm-codegen-shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
$(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
Expand Down
1 change: 1 addition & 0 deletions src/builtins.c
Expand Up @@ -490,6 +490,7 @@ JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
// eq hash table --------------------------------------------------------------

#include "iddict.c"
#include "idset.c"

// object model and type primitives -------------------------------------------

Expand Down
3 changes: 2 additions & 1 deletion src/gc.c
Expand Up @@ -3096,7 +3096,8 @@ static void gc_mark_roots(jl_gc_markqueue_t *mq)
// constants
gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
gc_try_claim_and_push(mq, cmpswap_names, NULL);
gc_try_claim_and_push(mq, jl_global_roots_table, NULL);
gc_try_claim_and_push(mq, jl_global_roots_list, NULL);
gc_try_claim_and_push(mq, jl_global_roots_keyset, NULL);
}

// find unmarked objects that need to be finalized from the finalizer list "list".
Expand Down
12 changes: 6 additions & 6 deletions src/gf.c
Expand Up @@ -110,7 +110,7 @@ static int8_t jl_cachearg_offset(jl_methtable_t *mt)
/// ----- Insertion logic for special entries ----- ///


static uint_t speccache_hash(size_t idx, jl_svec_t *data)
static uint_t speccache_hash(size_t idx, jl_value_t *data)
{
jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
jl_value_t *sig = ml->specTypes;
Expand All @@ -119,7 +119,7 @@ static uint_t speccache_hash(size_t idx, jl_svec_t *data)
return ((jl_datatype_t*)sig)->hash;
}

static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
static int speccache_eq(size_t idx, const void *ty, jl_value_t *data, uint_t hv)
{
jl_method_instance_t *ml = (jl_method_instance_t*)jl_svecref(data, idx);
jl_value_t *sig = ml->specTypes;
Expand All @@ -139,7 +139,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
JL_TYPECHK(specializations, datatype, ut);
uint_t hv = ((jl_datatype_t*)ut)->hash;
jl_array_t *speckeyset = NULL;
jl_genericmemory_t *speckeyset = NULL;
jl_value_t *specializations = NULL;
size_t i = -1, cl = 0, lastcl;
for (int locked = 0; locked < 2; locked++) {
Expand All @@ -164,7 +164,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
}
cl = jl_svec_len(specializations);
if (hv) {
ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, (jl_svec_t*)specializations, hv);
ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv, 0);
if (idx != -1) {
jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, idx);
if (locked)
Expand Down Expand Up @@ -210,7 +210,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
jl_atomic_store_release(&m->specializations, specializations);
jl_gc_wb(m, specializations);
if (hv)
jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, (jl_svec_t*)specializations);
jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, 0, specializations);
}
if (hv) {
_Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
Expand Down Expand Up @@ -242,7 +242,7 @@ static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PRO
assert(jl_svecref(specializations, i) == jl_nothing);
jl_svecset(specializations, i, mi);
if (hv)
jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, (jl_svec_t*)specializations);
jl_smallintset_insert(&m->speckeyset, (jl_value_t*)m, speccache_hash, i, specializations);
JL_GC_POP();
}
JL_UNLOCK(&m->writelock); // may gc
Expand Down
1 change: 1 addition & 0 deletions src/iddict.c
Expand Up @@ -194,3 +194,4 @@ size_t jl_eqtable_nextind(jl_genericmemory_t *t, size_t i)

#undef hash_size
#undef max_probe
#undef h2index
118 changes: 118 additions & 0 deletions src/idset.c
@@ -0,0 +1,118 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license


static uint_t idset_hash(size_t idx, jl_value_t *data)
{
jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
// x should not be NULL, unless there was concurrent corruption
return x == NULL ? 0 : jl_object_id(x);
}

static int idset_eq(size_t idx, const void *y, jl_value_t *data, uint_t hv)
{
jl_value_t *x = jl_genericmemory_ptr_ref(data, idx);
// x should not be NULL, unless there was concurrent corruption
return x == NULL ? 0 : jl_egal(x, (jl_value_t*)y);
}

jl_genericmemory_t *jl_idset_rehash(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, size_t newsz)
{
if (newsz == 0)
return idxs;
newsz = next_power_of_two(newsz);
//if (idxs->length == newsz)
// jl_idset_put_idx(keys, idxs, -newsz+1);
//else
return smallintset_rehash(idxs, idset_hash, (jl_value_t*)keys, newsz, 0);
}

// Return idx if key is in hash, otherwise -1
// Note: lookup in the IdSet is permitted concurrently, if you avoid deletions,
// and assuming you do use an external lock around all insertions
ssize_t jl_idset_peek_bp(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
{
uintptr_t hv = jl_object_id(key);
return jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 0);
}

jl_value_t *jl_idset_get(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
{
ssize_t idx = jl_idset_peek_bp(keys, idxs, key);
if (idx == -1)
return NULL;
return jl_genericmemory_ptr_ref(keys, idx);
}


static ssize_t idset_compact(jl_genericmemory_t *keys)
{
// compact keys before rehashing idxs
ssize_t i, j;
ssize_t rehash = 0;
for (i = j = 0; i < keys->length; i++) {
jl_value_t *k = jl_genericmemory_ptr_ref(keys, i);
if (k != NULL) {
if (i != j) {
rehash = 1;
jl_genericmemory_ptr_set(keys, j, k);
jl_genericmemory_ptr_set(keys, i, NULL);
}
j++;
}
}
return rehash ? -j : j;
}

jl_genericmemory_t *jl_idset_put_key(jl_genericmemory_t *keys, jl_value_t *key, ssize_t *newidx)
{
ssize_t l = keys->length;
ssize_t i = l;
while (i > 0 && jl_genericmemory_ptr_ref(keys, i - 1) == NULL)
i--;
// i points to the place to insert
*newidx = i;
if (i == l) {
i = idset_compact(keys);
if (i < 0) {
*newidx = i - 1;
i = -i;
}
if (i >= l / 3 * 2) {
size_t nl = l < 4 ? 4 : (l * 3) >> 1; // grow space by 50% if less than 33% free after compacting
jl_genericmemory_t *nk = jl_alloc_genericmemory(jl_memory_any_type, nl);
if (i > 0)
memcpy(nk->ptr, keys->ptr, sizeof(void*) * i);
keys = nk;
}
}
assert(jl_genericmemory_ptr_ref(keys, i) == NULL);
jl_genericmemory_ptr_set(keys, i, key);
return keys;
}

jl_genericmemory_t *jl_idset_put_idx(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, ssize_t idx)
{
_Atomic(jl_genericmemory_t*) newidxs = idxs;
JL_GC_PUSH1(&newidxs);
if (idx < 0) { // full rehash
smallintset_empty(idxs);
for (ssize_t i = 0; i < -idx; i++)
if (jl_genericmemory_ptr_ref(keys, i) != NULL)
jl_smallintset_insert(&newidxs, NULL, idset_hash, i, (jl_value_t*)keys);
}
else {
jl_smallintset_insert(&newidxs, NULL, idset_hash, idx, (jl_value_t*)keys);
}
JL_GC_POP();
return jl_atomic_load_relaxed(&newidxs);
}

/* returns idx if key is in hash, otherwise -1 */
ssize_t jl_idset_pop(jl_genericmemory_t *keys, jl_genericmemory_t *idxs, jl_value_t *key) JL_NOTSAFEPOINT
{
uintptr_t hv = jl_object_id(key);
ssize_t idx = jl_smallintset_lookup(idxs, idset_eq, key, (jl_value_t*)keys, hv, 1);
if (idx != -1)
jl_genericmemory_ptr_set(keys, idx, NULL);
return idx;
}
3 changes: 2 additions & 1 deletion src/init.c
Expand Up @@ -861,7 +861,8 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
jl_restore_system_image(jl_options.image_file);
} else {
jl_init_types();
jl_global_roots_table = jl_alloc_memory_any(0);
jl_global_roots_list = (jl_genericmemory_t*)jl_an_empty_memory_any;
jl_global_roots_keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
}

jl_init_flisp();
Expand Down
3 changes: 2 additions & 1 deletion src/jl_exported_data.inc
Expand Up @@ -73,12 +73,13 @@
XX(jl_genericmemory_type) \
XX(jl_genericmemory_typename) \
XX(jl_memory_uint8_type) \
XX(jl_memory_uint16_type) \
XX(jl_memory_uint32_type) \
XX(jl_memory_uint64_type) \
XX(jl_memoryref_any_type) \
XX(jl_genericmemoryref_type) \
XX(jl_genericmemoryref_typename) \
XX(jl_memoryref_uint8_type) \
XX(jl_memoryref_uint64_type) \
XX(jl_methoderror_type) \
XX(jl_method_instance_type) \
XX(jl_method_match_type) \
Expand Down
7 changes: 5 additions & 2 deletions src/jltypes.c
Expand Up @@ -2977,6 +2977,9 @@ void jl_init_types(void) JL_GC_DISABLED

jl_memory_any_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
jl_memory_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);
jl_memory_uint16_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint16_type, cpumem);
jl_memory_uint32_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint32_type, cpumem);
jl_memory_uint64_type = jl_apply_type3((jl_value_t*)jl_genericmemory_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint64_type, cpumem);
jl_memoryref_any_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_any_type, cpumem);
jl_memoryref_uint8_type = jl_apply_type3((jl_value_t*)jl_genericmemoryref_type, (jl_value_t*)jl_not_atomic_sym, (jl_value_t*)jl_uint8_type, cpumem);

Expand Down Expand Up @@ -3004,7 +3007,7 @@ void jl_init_types(void) JL_GC_DISABLED

// finish initializing module Core
core = jl_core_module;
jl_atomic_store_relaxed(&core->bindingkeyset, (jl_array_t*)jl_an_empty_vec_any);
jl_atomic_store_relaxed(&core->bindingkeyset, (jl_genericmemory_t*)jl_an_empty_memory_any);
// export own name, so "using Foo" makes "Foo" itself visible
jl_set_const(core, core->name, (jl_value_t*)core);
jl_module_public(core, core->name, 1);
Expand Down Expand Up @@ -3180,7 +3183,7 @@ void jl_init_types(void) JL_GC_DISABLED
jl_ulong_type,
jl_type_type,
jl_any_type, // union(jl_simplevector_type, jl_method_instance_type),
jl_array_type,
jl_genericmemory_type, // union(jl_memory_uint8_type, jl_memory_uint16_type, jl_memory_uint32_type, jl_memory_uint64_type, jl_memory_any_type)
jl_string_type,
jl_any_type,
jl_any_type,
Expand Down
7 changes: 5 additions & 2 deletions src/julia.h
Expand Up @@ -320,7 +320,7 @@ typedef struct _jl_method_t {

// table of all jl_method_instance_t specializations we have
_Atomic(jl_value_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....], or a single item
_Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
_Atomic(jl_genericmemory_t*) speckeyset; // index lookup by hash into specializations

jl_value_t *slot_syms; // compacted list of slot names (String)
jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
Expand Down Expand Up @@ -611,7 +611,7 @@ typedef struct _jl_module_t {
jl_sym_t *name;
struct _jl_module_t *parent;
_Atomic(jl_svec_t*) bindings;
_Atomic(jl_array_t*) bindingkeyset; // index lookup by name into bindings
_Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings
// hidden fields:
arraylist_t usings; // modules with all bindings potentially imported
jl_uuid_t build_id;
Expand Down Expand Up @@ -881,6 +881,9 @@ extern JL_DLLIMPORT jl_value_t *jl_array_int32_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_array_uint32_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_array_uint64_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memory_uint8_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memory_uint16_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memory_uint32_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memory_uint64_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memory_any_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memoryref_uint8_type JL_GLOBALLY_ROOTED;
extern JL_DLLIMPORT jl_value_t *jl_memoryref_any_type JL_GLOBALLY_ROOTED;
Expand Down