diff --git a/src/codegen.cpp b/src/codegen.cpp
index 88c27366fa7d6..c2a042967c97a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4236,7 +4236,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
             jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
             if (ci != jl_nothing) {
-                auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
                  // check if we know how to handle this specptr
                 if (invoke == jl_fptr_const_return_addr) {
                     result = mark_julia_const(ctx, codeinst->rettype_const);
@@ -4262,10 +4262,13 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
                         // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
-                        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
                         auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                         if (fptr) {
-                            if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
+                            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                                jl_cpu_pause();
+                            }
+                            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                            if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
                                 protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                                 need_to_emit = false;
                             }
@@ -5783,9 +5786,15 @@ static Function* gen_cfun_wrapper(
     if (lam && params.cache) {
         // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
         codeinst = jl_compile_method_internal(lam, world);
-        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
         auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
         assert(invoke);
+        if (fptr) {
+            while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                jl_cpu_pause();
+            }
+            invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        }
         // WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
         if (invoke == jl_fptr_args_addr) {
             callptr = fptr;
@@ -5796,7 +5805,7 @@ static Function* gen_cfun_wrapper(
             callptr = (void*)codeinst->rettype_const;
             calltype = 2;
         }
-        else if (codeinst->isspecsig) {
+        else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
             callptr = fptr;
             calltype = 3;
         }
@@ -8526,7 +8535,7 @@ void jl_compile_workqueue(
             "invalid world for code-instance");
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
         bool cache_valid = params.cache;
         if (params.external_linkage) {
             cache_valid = 0 && jl_object_in_image((jl_value_t*)codeinst);
@@ -8534,10 +8543,17 @@ void jl_compile_workqueue(
         // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
         if (cache_valid && invoke != NULL) {
             auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+            if (fptr) {
+                while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                    jl_cpu_pause();
+                }
+                // in case we are racing with another thread that is emitting this function
+                invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            }
             if (invoke == jl_fptr_args_addr) {
                 preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
             }
-            else if (codeinst->isspecsig) {
+            else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
                 preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                 preal_specsig = true;
             }
diff --git a/src/gf.c b/src/gf.c
index 894a8a415e002..42990baf7ad24 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -414,13 +414,13 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
-    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
+    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
     if ((const_flags & 1) != 0) {
         assert(const_flags & 2);
         jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
     }
-    codeinst->isspecsig = 0;
+    jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
     jl_atomic_store_relaxed(&codeinst->precompile, 0);
     jl_atomic_store_relaxed(&codeinst->next, NULL);
     codeinst->ipo_purity_bits = ipo_effects;
@@ -2218,12 +2218,33 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 mi, codeinst2->rettype,
                 codeinst2->min_world, codeinst2->max_world);
         if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
-            // once set, don't change invoke-ptr, as that leads to race conditions
-            // with the (not) simultaneous updates to invoke and specptr
-            codeinst->isspecsig = codeinst2->isspecsig;
             codeinst->rettype_const = codeinst2->rettype_const;
-            jl_atomic_store_release(&codeinst->specptr.fptr, jl_atomic_load_relaxed(&codeinst2->specptr.fptr));
-            jl_atomic_store_release(&codeinst->invoke, jl_atomic_load_relaxed(&codeinst2->invoke));
+            uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
+            void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
+            if (fptr != NULL) {
+                while (!(specsigflags & 0b10)) {
+                    jl_cpu_pause();
+                    specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
+                }
+                invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
+                void *prev_fptr = NULL;
+                // see jitlayers.cpp for the ordering restrictions here
+                if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
+                    jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1);
+                    jl_atomic_store_release(&codeinst->invoke, invoke);
+                    jl_atomic_store_release(&codeinst->specsigflags, specsigflags);
+                } else {
+                    // someone else already compiled it
+                    while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
+                        jl_cpu_pause();
+                    }
+                    // codeinst is now set up fully, safe to return
+                }
+            } else {
+                jl_callptr_t prev = NULL;
+                jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
+            }
         }
         // don't call record_precompile_statement here, since we already compiled it as mi2 which is better
         return codeinst;
@@ -2248,14 +2269,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
             if (unspecmi) {
                 jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
-                if (unspec && jl_atomic_load_acquire(&unspec->invoke)) {
+                jl_callptr_t unspec_invoke = NULL;
+                if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
                     jl_code_instance_t *codeinst = jl_new_codeinst(mi,
                         (jl_value_t*)jl_any_type, NULL, NULL,
                         0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-                    codeinst->isspecsig = 0;
-                    codeinst->specptr = unspec->specptr;
+                    void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
+                    if (unspec_fptr) {
+                        // wait until invoke and specsigflags are properly set
+                        while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
+                            jl_cpu_pause();
+                        }
+                        unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
+                    }
+                    jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
                     codeinst->rettype_const = unspec->rettype_const;
-                    jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&unspec->invoke));
+                    jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
                     jl_mi_cache_insert(mi, codeinst);
                     record_precompile_statement(mi);
                     return codeinst;
@@ -2272,7 +2301,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             jl_code_instance_t *codeinst = jl_new_codeinst(mi,
                 (jl_value_t*)jl_any_type, NULL, NULL,
                 0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-            jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_interpret_call);
+            jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
             record_precompile_statement(mi);
             return codeinst;
@@ -2289,7 +2318,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
         jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
         jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
         // ask codegen to make the fptr for unspec
-        if (jl_atomic_load_acquire(&ucache->invoke) == NULL) {
+        jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
+        if (ucache_invoke == NULL) {
             if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
                                               jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
                 jl_printf(JL_STDERR, "source not available for ");
@@ -2298,19 +2328,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 jl_error("source missing for method that needs to be compiled");
             }
             jl_generate_fptr_for_unspecialized(ucache);
+            ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
         }
-        assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
-        if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
-            jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
+        assert(ucache_invoke != NULL);
+        if (ucache_invoke != jl_fptr_sparam &&
+            ucache_invoke != jl_fptr_interpret_call) {
             // only these care about the exact specTypes, otherwise we can use it directly
             return ucache;
         }
         codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
             0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
-        codeinst->isspecsig = 0;
-        codeinst->specptr = ucache->specptr;
+        void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
+        if (unspec_fptr) {
+            // wait until invoke and specsigflags are properly set
+            while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
+                jl_cpu_pause();
+            }
+            ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
+        }
+        // unspec is always not specsig, but might use specptr
+        jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
+        jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
         codeinst->rettype_const = ucache->rettype_const;
-        jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&ucache->invoke));
+        jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
         jl_mi_cache_insert(mi, codeinst);
     }
     else {
@@ -2328,11 +2368,8 @@ jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t narg
 jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
-    while (1) {
-        if (invoke)
-            return invoke(f, args, nargs);
-        invoke = jl_atomic_load_acquire(&m->specptr.fptr1); // require forward progress with acquire annotation
-    }
+    assert(invoke && "Forgot to set specptr for jl_fptr_args!");
+    return invoke(f, args, nargs);
 }
 
 jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
@@ -2340,11 +2377,8 @@ jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_
     jl_svec_t *sparams = m->def->sparam_vals;
     assert(sparams != jl_emptysvec);
     jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
-    while (1) {
-        if (invoke)
-            return invoke(f, args, nargs, sparams);
-        invoke = jl_atomic_load_acquire(&m->specptr.fptr3); // require forward progress with acquire annotation
-    }
+    assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
+    return invoke(f, args, nargs, sparams);
 }
 
 JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
@@ -2667,7 +2701,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
     while (codeinst) {
         if (codeinst->min_world <= world && world <= codeinst->max_world) {
-            jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
             if (invoke != NULL) {
                 jl_value_t *res = invoke(F, args, nargs, codeinst);
                 return verify_type(res);
@@ -2687,7 +2721,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
     errno = last_errno;
     if (jl_options.malloc_log)
         jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
-    jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
     jl_value_t *res = invoke(F, args, nargs, codeinst);
     return verify_type(res);
 }
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index b744f9fcbd3f2..b489665f5629d 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -262,18 +262,31 @@ static jl_callptr_t _jl_compile_codeinst(
             addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
             isspecsig = true;
         }
-        if (jl_atomic_load_relaxed(&this_code->invoke) == NULL) {
-            // once set, don't change invoke-ptr, as that leads to race conditions
-            // with the (not) simultaneous updates to invoke and specptr
-            if (!decls.specFunctionObject.empty()) {
-                jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
-                this_code->isspecsig = isspecsig;
+        if (!decls.specFunctionObject.empty()) {
+            void *prev_specptr = NULL;
+            auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
+            if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
+                // only set specsig and invoke if we were the first to set specptr
+                jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
+                // we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
+                // either assumes that specptr was null, doesn't care about specptr,
+                // or will wait until specsigflags has 0b10 set before reloading invoke
+                jl_atomic_store_release(&this_code->invoke, addr);
+                jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
+            } else {
+                //someone else beat us, don't commit any results
+                while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
+                    jl_cpu_pause();
+                }
+                addr = jl_atomic_load_relaxed(&this_code->invoke);
+            }
+        } else {
+            jl_callptr_t prev_invoke = NULL;
+            if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
+                addr = prev_invoke;
+                //TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
+                //known lesser function)
             }
-            jl_atomic_store_release(&this_code->invoke, addr);
-        }
-        else if (jl_atomic_load_relaxed(&this_code->invoke) == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
-            // hack to export this pointer value to jl_dump_method_disasm
-            jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
         }
         if (this_code == codeinst)
             fptr = addr;
@@ -497,10 +510,9 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
         assert(src && jl_is_code_info(src));
         ++UnspecFPtrCount;
         _jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext());
-        if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
-            // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-            jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
-        }
+        jl_callptr_t null = nullptr;
+        // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
+        jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
         JL_GC_POP();
     }
     JL_UNLOCK(&jl_codegen_lock); // Might GC
@@ -519,7 +531,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
     // printing via disassembly
     jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
     if (codeinst) {
-        uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+        uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
         if (getwrapper)
             return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
         uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
@@ -547,7 +559,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
                     if (src && (jl_value_t*)src != jl_nothing)
                         src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
                 }
-                fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+                fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
                 specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                 if (src && jl_is_code_info(src)) {
                     if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
diff --git a/src/julia.h b/src/julia.h
index 8c3332fe50e12..19dab5cd3a704 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -424,8 +424,7 @@ typedef struct _jl_code_instance_t {
     jl_value_t *argescapes; // escape information of call arguments
 
     // compilation state cache
-    // WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with the invoke pointers.
-    uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype
+    _Atomic(uint8_t) specsigflags; // & 0b1 == specptr is a specialized function signature for specTypes->rettype, &0b10 == invokeptr matches specptr
     _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
     uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
     _Atomic(jl_callptr_t) invoke; // jlcall entry point
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
index 4da2e4f7a9994..959491f1ac048 100644
--- a/src/julia_atomics.h
+++ b/src/julia_atomics.h
@@ -160,6 +160,11 @@ bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::me
 {
      return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
 }
+template<class T, class S>
+bool jl_atomic_cmpswap_acqrel(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, memory_order_acq_rel, memory_order_acquire);
+}
 #define jl_atomic_cmpswap_relaxed(ptr, expected, val) jl_atomic_cmpswap_explicit(ptr, expected, val, memory_order_relaxed)
 template<class T, class S>
 T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
@@ -191,6 +196,8 @@ extern "C" {
     atomic_compare_exchange_strong(obj, expected, desired)
 #  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
     atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
+#define jl_atomic_cmpswap_acqrel(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_acq_rel, memory_order_acquire)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
 #  define jl_atomic_exchange(obj, desired)       \
     atomic_exchange(obj, desired)
@@ -251,6 +258,7 @@ extern "C" {
 #define jl_atomic_exchange_relaxed jl_atomic_exchange
 
 #undef jl_atomic_cmpswap
+#undef jl_atomic_cmpswap_acqrel
 #undef jl_atomic_cmpswap_relaxed
 #define jl_atomic_cmpswap(obj, expected, desired) \
     (__extension__({ \
@@ -264,6 +272,7 @@ extern "C" {
                 *x__analyzer__ = temp__analyzer__; \
             eq__analyzer__; \
         }))
+#define jl_atomic_cmpswap_acqrel jl_atomic_cmpswap
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
 #undef jl_atomic_store
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
index db596c2bb893f..6772290c8ab89 100644
--- a/src/opaque_closure.c
+++ b/src/opaque_closure.c
@@ -75,18 +75,23 @@ static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t
     oc->source = source;
     oc->captures = captures;
     oc->specptr = NULL;
-    if (!ci || jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_interpret_call) {
+    if (!ci) {
         oc->invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
-    }
-    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_args) {
-        oc->invoke = jl_atomic_load_relaxed(&ci->specptr.fptr1);
-    }
-    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
-        oc->invoke = (jl_fptr_args_t)jl_fptr_const_opaque_closure;
-        oc->captures = ci->rettype_const;
-    }
-    else {
-        oc->invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
+    } else {
+        jl_callptr_t invoke = jl_atomic_load_acquire(&ci->invoke);
+        if (invoke == jl_fptr_interpret_call) {
+            oc->invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+        }
+        else if (invoke == jl_fptr_args) {
+            oc->invoke = jl_atomic_load_relaxed(&ci->specptr.fptr1);
+        }
+        else if (invoke == jl_fptr_const_return) {
+            oc->invoke = (jl_fptr_args_t)jl_fptr_const_opaque_closure;
+            oc->captures = ci->rettype_const;
+        }
+        else {
+            oc->invoke = (jl_fptr_args_t) invoke;
+        }
     }
     oc->world = world;
     return oc;
diff --git a/src/staticdata.c b/src/staticdata.c
index c4ed2b531ae0f..9ae00b395a0e8 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -1376,7 +1376,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                 }
 
                 newm->invoke = NULL;
-                newm->isspecsig = 0;
+                newm->specsigflags = 0;
                 newm->specptr.fptr = NULL;
                 int8_t fptr_id = JL_API_NULL;
                 int8_t builtin_id = 0;
@@ -1889,7 +1889,7 @@ static void jl_update_all_fptrs(jl_serializer_state *s, jl_image_t *image)
             void *fptr = (void*)(base + offset);
             if (specfunc) {
                 codeinst->specptr.fptr = fptr;
-                codeinst->isspecsig = 1; // TODO: set only if confirmed to be true
+                codeinst->specsigflags = 0b11; // TODO: set only if confirmed to be true
             }
             else {
                 codeinst->invoke = (jl_callptr_t)fptr;
@@ -1962,7 +1962,7 @@ static void jl_root_new_gvars(jl_serializer_state *s, jl_image_t *image, uint32_
                 v = (uintptr_t)jl_as_global_root((jl_value_t*)v);
         } else {
             jl_code_instance_t *codeinst = (jl_code_instance_t*) v;
-            assert(codeinst && codeinst->isspecsig);
+            assert(codeinst && (codeinst->specsigflags & 0b01));
             v = (uintptr_t)codeinst->specptr.fptr;
         }
         *gv = v;