Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jitlayers: replace sharedbytes intern pool with one that respects alignment #52182

Merged
merged 1 commit into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -4065,8 +4065,7 @@ static void *gc_perm_alloc_large(size_t sz, int zero, unsigned align, unsigned o
errno = last_errno;
jl_may_leak(base);
assert(align > 0);
unsigned diff = (offset - (uintptr_t)base) % align;
return (void*)((char*)base + diff);
return (void*)(LLT_ALIGN((uintptr_t)base + offset, (uintptr_t)align) - offset);
}

STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned offset) JL_NOTSAFEPOINT
Expand Down
21 changes: 15 additions & 6 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1410,10 +1410,12 @@ namespace {

struct JITPointersT {

JITPointersT(orc::ExecutionSession &ES) JL_NOTSAFEPOINT : ES(ES) {}
JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT
: SharedBytes(SharedBytes), Lock(Lock) {}

Expected<orc::ThreadSafeModule> operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
std::lock_guard<std::mutex> locked(Lock);
for (auto &GV : make_early_inc_range(M.globals())) {
if (auto *Shared = getSharedBytes(GV)) {
++InternedGlobals;
Expand All @@ -1429,10 +1431,11 @@ namespace {
return std::move(TSM);
}

private:
// optimize memory by turning long strings into memoized copies, instead of
// making a copy per object file of output.
// we memoize them using the ExecutionSession's string pool;
// this makes it unsafe to call clearDeadEntries() on the pool.
// we memoize them using a StringSet with a custom-alignment allocator
// to ensure they are properly aligned
Constant *getSharedBytes(GlobalVariable &GV) JL_NOTSAFEPOINT {
// We could probably technically get away with
// interning even external linkage globals,
Expand All @@ -1458,11 +1461,17 @@ namespace {
// Cutoff, since we don't want to intern small strings
return nullptr;
}
auto Interned = *ES.intern(Data);
Align Required = GV.getAlign().valueOrOne();
Align Preferred = MaxAlignedAlloc::alignment(Data.size());
if (Required > Preferred)
return nullptr;
StringRef Interned = SharedBytes.insert(Data).first->getKey();
assert(llvm::isAddrAligned(Preferred, Interned.data()));
return literal_static_pointer_val(Interned.data(), GV.getType());
}

orc::ExecutionSession &ES;
SharedBytesT &SharedBytes;
std::mutex &Lock;
};
}

Expand Down Expand Up @@ -1696,7 +1705,7 @@ JuliaOJIT::JuliaOJIT()
#endif
LockLayer(ObjectLayer),
CompileLayer(ES, LockLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(ES))),
JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))),
OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT<N_optlevels>(*TM, PrintLLVMTimers))),
OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)),
DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)),
Expand Down
43 changes: 43 additions & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

#include <llvm/ADT/MapVector.h>
#include <llvm/ADT/StringSet.h>
#include <llvm/Support/AllocatorBase.h>

#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Constants.h>
Expand Down Expand Up @@ -292,6 +294,44 @@ static const inline char *name_from_method_instance(jl_method_instance_t *li) JL
return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope";
}

template <size_t offset = 0>
class MaxAlignedAllocImpl
: public AllocatorBase<MaxAlignedAllocImpl<offset>> {

public:
MaxAlignedAllocImpl() JL_NOTSAFEPOINT = default;

static Align alignment(size_t Size) JL_NOTSAFEPOINT {
// Define the maximum alignment we expect to require, from offset bytes off
// the returned pointer, this is >= alignof(std::max_align_t), which is too
// small often to actually use.
const size_t MaxAlignment = JL_CACHE_BYTE_ALIGNMENT;
return Align(std::min((size_t)llvm::PowerOf2Ceil(Size), MaxAlignment));
}

LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) {
Align MaxAlign = alignment(Size);
assert(Alignment < MaxAlign); (void)Alignment;
return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset);
}

inline LLVM_ATTRIBUTE_RETURNS_NONNULL
vtjnash marked this conversation as resolved.
Show resolved Hide resolved
void * Allocate(size_t Size, size_t Alignment) {
return Allocate(Size, Align(Alignment));
}

// Pull in base class overloads.
using AllocatorBase<MaxAlignedAllocImpl>::Allocate;

void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { abort(); }

// Pull in base class overloads.
using AllocatorBase<MaxAlignedAllocImpl>::Deallocate;

private:
};
using MaxAlignedAlloc = MaxAlignedAllocImpl<>;

typedef JITSymbol JL_JITSymbol;
// The type that is similar to SymbolInfo on LLVM 4.0 is actually
// `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol
Expand All @@ -300,6 +340,7 @@ typedef JITSymbol JL_SymbolInfo;

using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
using OptimizerResultT = Expected<orc::ThreadSafeModule>;
using SharedBytesT = StringSet<MaxAlignedAllocImpl<sizeof(StringSet<>::MapEntryTy)>>;

class JuliaOJIT {
public:
Expand Down Expand Up @@ -516,6 +557,7 @@ class JuliaOJIT {

// Note that this is a safepoint due to jl_get_library_ and jl_dlsym calls
void optimizeDLSyms(Module &M);

private:

const std::unique_ptr<TargetMachine> TM;
Expand All @@ -529,6 +571,7 @@ class JuliaOJIT {
std::mutex RLST_mutex{};
int RLST_inc = 0;
DenseMap<void*, std::string> ReverseLocalSymbolTable;
SharedBytesT SharedBytes;

std::unique_ptr<DLSymOptimizer> DLSymOpt;

Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
extern uv_mutex_t gc_perm_lock;
void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
unsigned align, unsigned offset) JL_NOTSAFEPOINT;
void *jl_gc_perm_alloc(size_t sz, int zero,
JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero,
unsigned align, unsigned offset) JL_NOTSAFEPOINT;
void gc_sweep_sysimg(void);

Expand Down