Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Stack allocate some genericmemory #52382

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -507,10 +507,10 @@ const undef = UndefInitializer()

# type and dimensionality specified
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} =
if isdefined(self, :instance) && m === 0
self.instance
else
if (kind === :not_atomic && addrspace === CPU) || (!isdefined(self, :instance) || m !== 0)
ccall(:jl_alloc_genericmemory, Ref{GenericMemory{kind,T,addrspace}}, (Any, Int), self, m)
else
self.instance
end
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
# empty vector constructor
Expand Down
21 changes: 18 additions & 3 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1783,8 +1783,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
assert(!isVa && !llvmcall && nccallargs == 2);
const jl_cgval_t &typ = argv[0];
const jl_cgval_t &nel = argv[1];
auto istyp = argv[0].constant;
auto arg_typename = [&] JL_NOTSAFEPOINT {
auto istyp = argv[0].constant;
std::string type_str;
if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)){
auto eltype = jl_tparam1(istyp);
Expand All @@ -1798,8 +1798,23 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
else
type_str = "<unknown type>";
return "Memory{" + type_str + "}[]";
};
auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory), { boxed(ctx,typ), emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type)});
};
auto elsize = emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type);
jl_genericmemory_info_t info;
if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)) {
info = jl_get_genericmemory_info(istyp);
} else {
info = {0, 0, 0, 0};
}
auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory),
{
boxed(ctx,typ),
elsize,
static_cast<Value*>(ConstantInt::get(ctx.types().T_size, info.elsize)),
static_cast<Value*>(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), info.isunion)),
static_cast<Value*>(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), info.zeroinit)),
static_cast<Value*>(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), info.isboxed)),
});
setName(ctx.emission_context, alloc, arg_typename);
JL_GC_POP();
return mark_julia_type(ctx, alloc, true, jl_any_type);
Expand Down
34 changes: 13 additions & 21 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,16 +603,6 @@ static inline void add_named_global(StringRef name, T *addr)
add_named_global(name, (void*)(uintptr_t)addr);
}

AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKind> attrkinds, std::initializer_list<Attribute> extra={})
{
SmallVector<Attribute, 8> attrs(attrkinds.size() + extra.size());
for (size_t i = 0; i < attrkinds.size(); i++)
attrs[i] = Attribute::get(C, attrkinds.begin()[i]);
for (size_t i = 0; i < extra.size(); i++)
attrs[attrkinds.size() + i] = extra.begin()[i];
return AttributeSet::get(C, ArrayRef<Attribute>(attrs));
}

static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); }

static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); }
Expand Down Expand Up @@ -1303,12 +1293,20 @@ static const auto sync_gc_total_bytes_func = new JuliaFunction<>{
nullptr,
};
static const auto jl_allocgenericmemory = new JuliaFunction<TypeFnContextAndSizeT>{
XSTR(jl_alloc_genericmemory),
"julia.gc_alloc_genericmemory",
[](LLVMContext &C, Type *T_Size) {
auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
return FunctionType::get(T_prjlvalue, // new Memory
{T_prjlvalue, // type
T_Size // nelements
T_Size, // nelements
// these fields are for alloc-opt, because
// when compiling for images we need to know these
// to stack allocate arrays
// if it's dynamic, we just set everything to 0
T_Size, // elsize
getInt8Ty(C), // isunion
getInt8Ty(C), // zeroinit
getInt8Ty(C), // boxed
}, false); },
[](LLVMContext &C) {
AttrBuilder FnAttrs(C);
Expand Down Expand Up @@ -1420,14 +1418,8 @@ static const auto gc_loaded_func = new JuliaFunction<>{
// top:
// %metadata GC base pointer is ptr(Tracked)
// ret addrspacecast ptr to ptr(Loaded)
[](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_prjlvalue_ty(C), AddressSpace::Loaded),
{JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_prjlvalue_ty(C), 0)}, false); },
[](LLVMContext &C) {
AttributeSet FnAttrs = Attributes(C, {Attribute::ReadNone, Attribute::NoSync, Attribute::NoUnwind, Attribute::Speculatable, Attribute::WillReturn, Attribute::NoRecurse});
AttributeSet RetAttrs = Attributes(C, {Attribute::NonNull, Attribute::NoUndef});
return AttributeList::get(C, FnAttrs, RetAttrs,
{ Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone, Attribute::NoCapture}),
Attributes(C, {Attribute::NonNull, Attribute::NoUndef, Attribute::ReadNone}) }); },
[](LLVMContext &C) { return get_gc_loaded_decl(C).first; },
[](LLVMContext &C) { return get_gc_loaded_decl(C).second; },
};

// julia.call represents a call with julia calling convention, it is used as
Expand Down Expand Up @@ -9432,7 +9424,7 @@ static void init_jit_functions(void)
add_named_global(jlfieldindex_func, &jl_field_index);
add_named_global(diff_gc_total_bytes_func, &jl_gc_diff_total_bytes);
add_named_global(sync_gc_total_bytes_func, &jl_gc_sync_total_bytes);
add_named_global(jl_allocgenericmemory, &jl_alloc_genericmemory);
add_named_global(jl_allocgenericmemory, (void*)NULL);
add_named_global(gcroot_flush_func, (void*)NULL);
add_named_global(gc_preserve_begin_func, (void*)NULL);
add_named_global(gc_preserve_end_func, (void*)NULL);
Expand Down
60 changes: 38 additions & 22 deletions src/genericmemory.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,40 @@ typedef uint64_t wideint_t;

#define MAXINTVAL (((size_t)-1)>>1)

jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz)
// used by alloc-opt
JL_DLLEXPORT size_t jl_genericmemory_bytesize(const jl_genericmemory_info_t *info, size_t nel)
{
wideint_t prod = (wideint_t)nel * info->elsize;
if (info->isunion) {
// an extra byte for each isbits union memory element, stored at m->ptr + m->length
prod += nel;
}
if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL)
return MAXINTVAL;
return (size_t) prod;
}

// used by codegen to give info to alloc-opt
JL_DLLEXPORT jl_genericmemory_info_t jl_get_genericmemory_info(jl_value_t *mtype)
{
assert(jl_is_datatype(mtype));
jl_genericmemory_info_t info;
info.isboxed = ((jl_datatype_t*)mtype)->layout->flags.arrayelem_isboxed;
info.elsize = info.isboxed ? sizeof(void*) : ((jl_datatype_t*)mtype)->layout->size;
info.isunion = ((jl_datatype_t*)mtype)->layout->flags.arrayelem_isunion;
info.zeroinit = ((jl_datatype_t*)mtype)->zeroinit;
return info;
}

jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, const jl_genericmemory_info_t *info)
{
jl_task_t *ct = jl_current_task;
char *data;
jl_genericmemory_t *m;
if (nel == 0) // zero-sized allocation optimization
return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
wideint_t prod = (wideint_t)nel * elsz;
if (isunion) {
// an extra byte for each isbits union memory element, stored at m->ptr + m->length
prod += nel;
}
if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL)
size_t prod = jl_genericmemory_bytesize(info, nel);
if (prod == MAXINTVAL)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size");
size_t tot = (size_t)prod + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);

Expand All @@ -89,7 +110,7 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is
m->length = nel;
m->ptr = data;

if (zeroinit)
if (info->zeroinit)
memset(data, 0, (size_t)prod);
return m;
}
Expand All @@ -114,13 +135,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_
if (nel == 0) // zero-sized allocation optimization fast path
return m;

size_t elsz = layout->size;
int isboxed = layout->flags.arrayelem_isboxed;
int isunion = layout->flags.arrayelem_isunion;
int zi = ((jl_datatype_t*)mtype)->zeroinit;
if (isboxed)
elsz = sizeof(void*);
return _new_genericmemory_(mtype, nel, isunion, zi, elsz);
jl_genericmemory_info_t info = jl_get_genericmemory_info(mtype);
return _new_genericmemory_(mtype, nel, &info);
}

JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str)
Expand Down Expand Up @@ -447,18 +463,18 @@ JL_DLLEXPORT jl_genericmemory_t *jl_genericmemory_copy_slice(jl_genericmemory_t
{
jl_value_t *mtype = (jl_value_t*)jl_typetagof(mem);
const jl_datatype_layout_t *layout = ((jl_datatype_t*)mtype)->layout;
size_t elsz = layout->size;
int isunion = layout->flags.arrayelem_isunion;
jl_genericmemory_t *new_mem = _new_genericmemory_(mtype, len, isunion, 0, elsz);
if (isunion) {
memcpy(new_mem->ptr, (char*)mem->ptr + (size_t)data * elsz, len * elsz);
jl_genericmemory_info_t info = jl_get_genericmemory_info(mtype);
info.zeroinit = 0;
jl_genericmemory_t *new_mem = _new_genericmemory_(mtype, len, &info);
if (info.isunion) {
memcpy(new_mem->ptr, (char*)mem->ptr + (size_t)data * info.elsize, len * info.elsize);
memcpy(jl_genericmemory_typetagdata(new_mem), jl_genericmemory_typetagdata(mem) + (size_t)data, len);
}
else if (layout->first_ptr != -1) {
memmove_refs((_Atomic(void*)*)new_mem->ptr, (_Atomic(void*)*)data, len * elsz / sizeof(void*));
memmove_refs((_Atomic(void*)*)new_mem->ptr, (_Atomic(void*)*)data, len * info.elsize / sizeof(void*));
}
else if (data != NULL) {
memcpy(new_mem->ptr, data, len * elsz);
memcpy(new_mem->ptr, data, len * info.elsize);
}
return new_mem;
}
Expand Down
12 changes: 12 additions & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,13 @@ typedef union {
uint8_t packed;
} jl_code_info_flags_t;

typedef struct {
size_t elsize;
uint8_t isunion;
uint8_t zeroinit;
uint8_t isboxed;
} jl_genericmemory_info_t;

// -- functions -- //

JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
Expand Down Expand Up @@ -988,6 +995,11 @@ size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;

uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;

// used by alloc-opt
JL_DLLEXPORT size_t jl_genericmemory_bytesize(const jl_genericmemory_info_t *info, size_t nel);
// used by codegen to give info to alloc-opt
JL_DLLEXPORT jl_genericmemory_info_t jl_get_genericmemory_info(jl_value_t *mtype);

// the first argument to jl_idtable_rehash is used to return a value
// make sure it is rooted if it is used after the function returns
JL_DLLEXPORT jl_genericmemory_t *jl_idtable_rehash(jl_genericmemory_t *a, size_t newsz);
Expand Down