Merge pull request #1640 from nwc10/C11-atomics

Add a new Configure.pl flag --c11-atomics that uses C11's <stdatomic.h> for MoarVM's atomic operations instead of the bundled library libatomic_ops. If using C11 atomics then completely disable building libatomic_ops. This permits MoarVM to build on architectures which libatomic_ops does not support, as long as they have a current C compiler which implements this optional C11 feature. With this we can build MoarVM, NQP and Rakudo on the Loongson-3A5000 hardware on the GCC Compile Farm. This is a new architecture not supported by libatomic_ops, but with a C compiler supporting C11 atomics.
MoarVM · Jan 18, 2022 · f884085 · f884085
2 parents 4b2de6d + 4469688
commit f884085
Show file tree

Hide file tree

Showing 13 changed files with 152 additions and 28 deletions.
diff --git a/Configure.pl b/Configure.pl
@@ -49,7 +49,7 @@ sub uniq {
     prefix=s bindir=s libdir=s mastdir=s
     relocatable make-install asan ubsan tsan
     valgrind telemeh! dtrace show-autovect git-cache-dir=s
-    show-autovect-failed:s mimalloc!),
+    show-autovect-failed:s mimalloc! c11-atomics!),
 
     'no-optimize|nooptimize' => sub { $args{optimize} = 0 },
     'no-debug|nodebug' => sub { $args{debug} = 0 },
@@ -272,7 +272,12 @@ sub uniq {
     push @hllincludes, 'libuv';
 }
 
-if ($args{'has-libatomic_ops'}) {
+$config{use_c11_atomics} = $args{'c11-atomics'} ? 1 : 0;
+
+if ($config{use_c11_atomics}) {
+    $defaults{-thirdparty}->{lao} = undef;
+}
+elsif ($args{'has-libatomic_ops'}) {
     $defaults{-thirdparty}->{lao} = undef;
     unshift @{$config{usrlibs}}, 'atomic_ops';
     setup_native_library('atomic_ops') if $config{pkgconfig_works};
@@ -1126,6 +1131,15 @@ =head1 OPTIONS
 C library's malloc always. Specify C<--mimalloc> to force use of mimalloc, even
 if the probing thinks that it won't build.
 
+=item --c11-atomics
+
+=item --no-c11-atomics
+
+Use C11 atomics instead of libatomic_ops for atomic operations. The default
+is currently C<--no-c11-atomics> - ie use libatomic_ops. If you set
+C<--c11-atomics> and your compiler does not support C11 atomics, your build
+will fail.
+
 =item --os <os>
 
 Set the operating system name which you are compiling to.

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -50,6 +50,11 @@ stages:
            RAKUDO_OPTIONS: '--relocatable'
            NQP_OPTIONS: '--backends=moar --relocatable'
            MOAR_OPTIONS: '--relocatable'
+         Mac_MVM_C11_atomics:
+           IMAGE_NAME: 'macOS-10.15'
+           RAKUDO_OPTIONS: ''
+           NQP_OPTIONS: '--backends=moar'
+           MOAR_OPTIONS: '--c11-atomics'
 
          Lin_MVM:
            IMAGE_NAME: 'ubuntu-20.04'
@@ -147,6 +152,18 @@ stages:
            NQP_OPTIONS: '--backends=moar'
            MOAR_OPTIONS: '--cc=clang --debug=3 --no-mimalloc'
            CHECK_LEAKS: 'yes'
+         MVM_gcc_c11_atomics:
+           IMAGE_NAME: 'ubuntu-20.04'
+           RAKUDO_OPTIONS: ''
+           NQP_OPTIONS: '--backends=moar'
+           MOAR_OPTIONS: '--cc=gcc --debug=3 --c11-atomics'
+           CHECK_LEAKS: 'yes'
+         MVM_clang_c11_atomics:
+           IMAGE_NAME: 'ubuntu-20.04'
+           RAKUDO_OPTIONS: ''
+           NQP_OPTIONS: '--backends=moar'
+           MOAR_OPTIONS: '--cc=clang --debug=3 --c11-atomics'
+           CHECK_LEAKS: 'yes'
 
       pool:
         vmImage: $(IMAGE_NAME)

diff --git a/build/config.h.in b/build/config.h.in
@@ -109,6 +109,10 @@
 #define MVM_USE_MIMALLOC
 #endif
 
+#if @use_c11_atomics@
+#define MVM_USE_C11_ATOMICS
+#endif
+
 /* Should we translate \n to \r\n on output? */
 #define MVM_TRANSLATE_NEWLINE_OUTPUT @translate_newline_output@
 

diff --git a/src/6model/containers.c b/src/6model/containers.c
@@ -126,8 +126,8 @@ static const MVMContainerSpec code_pair_spec = {
     NULL,
     code_pair_can_store,
     NULL, /* cas */
-    NULL, /* atomic_load */
-    NULL, /* atomic_store */
+    NULL, /* load_atomic */
+    NULL, /* store_atomic */
     0
 };
 
@@ -184,7 +184,7 @@ typedef struct {
     MVMCode *store;
     MVMCode *store_unchecked;
     MVMCode *cas;
-    MVMCode *atomic_store;
+    MVMCode *store_atomic;
 
     /* Retained for serialization purposes only. */
     MVMObject *attrs_class;
@@ -277,7 +277,7 @@ static void value_desc_cont_gc_mark_data(MVMThreadContext *tc, MVMSTable *st, MV
     MVM_gc_worklist_add(tc, worklist, &data->store);
     MVM_gc_worklist_add(tc, worklist, &data->store_unchecked);
     MVM_gc_worklist_add(tc, worklist, &data->cas);
-    MVM_gc_worklist_add(tc, worklist, &data->atomic_store);
+    MVM_gc_worklist_add(tc, worklist, &data->store_atomic);
     MVM_gc_worklist_add(tc, worklist, &data->attrs_class);
     MVM_gc_worklist_add(tc, worklist, &data->value_attr);
     MVM_gc_worklist_add(tc, worklist, &data->descriptor_attr);
@@ -292,7 +292,7 @@ static void value_desc_cont_serialize(MVMThreadContext *tc, MVMSTable *st, MVMSe
     MVM_serialization_write_ref(tc, writer, (MVMObject *)data->store);
     MVM_serialization_write_ref(tc, writer, (MVMObject *)data->store_unchecked);
     MVM_serialization_write_ref(tc, writer, (MVMObject *)data->cas);
-    MVM_serialization_write_ref(tc, writer, (MVMObject *)data->atomic_store);
+    MVM_serialization_write_ref(tc, writer, (MVMObject *)data->store_atomic);
     MVM_serialization_write_ref(tc, writer, data->attrs_class);
     MVM_serialization_write_str(tc, writer, data->value_attr);
     MVM_serialization_write_str(tc, writer, data->descriptor_attr);
@@ -303,7 +303,7 @@ static void value_desc_cont_deserialize(MVMThreadContext *tc, MVMSTable *st, MVM
     MVM_ASSIGN_REF(tc, &(st->header), data->store, MVM_serialization_read_ref(tc, reader));
     MVM_ASSIGN_REF(tc, &(st->header), data->store_unchecked, MVM_serialization_read_ref(tc, reader));
     MVM_ASSIGN_REF(tc, &(st->header), data->cas, MVM_serialization_read_ref(tc, reader));
-    MVM_ASSIGN_REF(tc, &(st->header), data->atomic_store, MVM_serialization_read_ref(tc, reader));
+    MVM_ASSIGN_REF(tc, &(st->header), data->store_atomic, MVM_serialization_read_ref(tc, reader));
     MVM_ASSIGN_REF(tc, &(st->header), data->attrs_class, MVM_serialization_read_ref(tc, reader));
     MVM_ASSIGN_REF(tc, &(st->header), data->value_attr, MVM_serialization_read_str(tc, reader));
     MVM_ASSIGN_REF(tc, &(st->header), data->descriptor_attr, MVM_serialization_read_str(tc, reader));
@@ -359,7 +359,7 @@ static void value_desc_cont_atomic_store(MVMThreadContext *tc, MVMObject *cont,
             MVM_callsite_get_common(tc, MVM_CALLSITE_ID_OBJ_OBJ));
     args_record->args.source[0].o = cont;
     args_record->args.source[1].o = value;
-    MVM_frame_dispatch_from_c(tc, data->atomic_store, args_record, NULL, MVM_RETURN_VOID);
+    MVM_frame_dispatch_from_c(tc, data->store_atomic, args_record, NULL, MVM_RETURN_VOID);
 }
 
 static const MVMContainerSpec value_desc_cont_spec = {
@@ -424,7 +424,7 @@ static void value_desc_cont_configure_container_spec(MVMThreadContext *tc, MVMST
         value = grab_one_value(tc, config, "atomic_store");
         if (!MVM_code_iscode(tc, value))
             MVM_exception_throw_adhoc(tc, "Container spec must be configured with a code handle");
-        MVM_ASSIGN_REF(tc, &(st->header), data->atomic_store, value);
+        MVM_ASSIGN_REF(tc, &(st->header), data->store_atomic, value);
         value = grab_one_value(tc, config, "attrs_class");
         MVM_ASSIGN_REF(tc, &(st->header), data->attrs_class, value);
         value = grab_one_value(tc, config, "value_attr");
@@ -704,8 +704,8 @@ static const MVMContainerSpec native_ref_spec = {
     NULL,
     native_ref_can_store,
     NULL, /* cas */
-    NULL, /* atomic_load */
-    NULL, /* atomic_store */
+    NULL, /* load_atomic */
+    NULL, /* store_atomic */
     1
 };
 
@@ -989,8 +989,8 @@ MVMObject * MVM_6model_container_atomic_load(MVMThreadContext *tc, MVMObject *co
     if (IS_CONCRETE(cont)) {
         MVMContainerSpec const *cs = cont->st->container_spec;
         if (cs) {
-            if (cs->atomic_load)
-                return cs->atomic_load(tc, cont);
+            if (cs->load_atomic)
+                return cs->load_atomic(tc, cont);
             else
                 MVM_exception_throw_adhoc(tc,
                     "A %s container does not know how to do an atomic load",
@@ -1013,8 +1013,8 @@ void MVM_6model_container_atomic_store(MVMThreadContext *tc, MVMObject *cont, MV
     if (IS_CONCRETE(cont)) {
         MVMContainerSpec const *cs = cont->st->container_spec;
         if (cs) {
-            if (cs->atomic_store)
-                cs->atomic_store(tc, cont, value);
+            if (cs->store_atomic)
+                cs->store_atomic(tc, cont, value);
             else
                 MVM_exception_throw_adhoc(tc,
                     "A %s container does not know how to do an atomic store",
@@ -1057,7 +1057,7 @@ static AO_t * native_ref_as_atomic_i(MVMThreadContext *tc, MVMObject *cont) {
 
 MVMint64 MVM_6model_container_cas_i(MVMThreadContext *tc, MVMObject *cont,
                                     MVMint64 expected, MVMint64 value) {
-    return (MVMint64)MVM_cas(native_ref_as_atomic_i(tc, cont), (AO_t)expected, (AO_t)value);
+    return (MVMint64)MVM_cas(native_ref_as_atomic_i(tc, cont), AO_CAST(expected), AO_CAST(value));
 }
 
 MVMint64 MVM_6model_container_atomic_load_i(MVMThreadContext *tc, MVMObject *cont) {
@@ -1077,5 +1077,5 @@ MVMint64 MVM_6model_container_atomic_dec(MVMThreadContext *tc, MVMObject *cont)
 }
 
 MVMint64 MVM_6model_container_atomic_add(MVMThreadContext *tc, MVMObject *cont, MVMint64 value) {
-    return (MVMint64)MVM_add(native_ref_as_atomic_i(tc, cont), (AO_t)value);
+    return (MVMint64)MVM_add(native_ref_as_atomic_i(tc, cont), AO_CAST(value));
 }
diff --git a/src/6model/containers.h b/src/6model/containers.h
@@ -57,8 +57,8 @@ struct MVMContainerSpec {
      * operation, and atomic store operation. */
     void (*cas) (MVMThreadContext *tc, MVMObject *cont, MVMObject *expected,
         MVMObject *value, MVMRegister *result);
-    MVMObject * (*atomic_load) (MVMThreadContext *tc, MVMObject *cont);
-    void (*atomic_store) (MVMThreadContext *tc, MVMObject *cont, MVMObject *value);
+    MVMObject * (*load_atomic) (MVMThreadContext *tc, MVMObject *cont);
+    void (*store_atomic) (MVMThreadContext *tc, MVMObject *cont, MVMObject *value);
 
     /* Set this to a non-zero value if a fetch promises to never invoke any
      * code. This means the VM knows it can safely decontainerize in places

diff --git a/src/core/interp.c b/src/core/interp.c
@@ -6223,15 +6223,15 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex
             }
             OP(sp_atomicload_o): {
                 MVMObject *target = GET_REG(cur_op, 2).o;
-                GET_REG(cur_op, 0).o = target->st->container_spec->atomic_load(tc, target);
+                GET_REG(cur_op, 0).o = target->st->container_spec->load_atomic(tc, target);
                 cur_op += 4;
                 goto NEXT;
             }
             OP(sp_atomicstore_o): {
                 MVMObject *target = GET_REG(cur_op, 0).o;
                 MVMObject *value = GET_REG(cur_op, 2).o;
                 cur_op += 4;
-                target->st->container_spec->atomic_store(tc, target, value);
+                target->st->container_spec->store_atomic(tc, target, value);
                 goto NEXT;
             }
             OP(sp_add_I): {

diff --git a/src/core/threads.c b/src/core/threads.c
@@ -278,7 +278,7 @@ MVMint32 MVM_thread_cleanup_threads_list(MVMThreadContext *tc, MVMThread **head)
     *head = NULL;
     while (this) {
         next = this->body.next;
-        switch (this->body.stage) {
+        switch (AO_READ(this->body.stage)) {
             case MVM_thread_stage_starting:
             case MVM_thread_stage_waiting:
             case MVM_thread_stage_started:

diff --git a/src/gc/orchestrate.c b/src/gc/orchestrate.c
@@ -29,7 +29,7 @@ static MVMuint32 signal_one_thread(MVMThreadContext *tc, MVMThreadContext *to_si
     unsigned int had_suspend_request = 0;
     while (1) {
         AO_t current = MVM_load(&to_signal->gc_status);
-        switch (current) {
+        switch (AO_READ(current)) {
             case MVMGCStatus_NONE:
                 /* Try to set it from running to interrupted - the common case. */
                 if (MVM_cas(&to_signal->gc_status, MVMGCStatus_NONE,

diff --git a/src/jit/compile.c b/src/jit/compile.c
@@ -258,14 +258,23 @@ MVMJitCode * MVM_jit_compiler_assemble(MVMThreadContext *tc, MVMJitCompiler *cl,
 }
 
 MVMJitCode* MVM_jit_code_copy(MVMThreadContext *tc, MVMJitCode * const code) {
+#ifdef MVM_USE_C11_ATOMICS
+    atomic_fetch_add_explicit(&code->ref_cnt, 1, memory_order_relaxed);
+#else
     AO_fetch_and_add1(&code->ref_cnt);
+#endif
     return code;
 }
 
 void MVM_jit_code_destroy(MVMThreadContext *tc, MVMJitCode *code) {
     /* fetch_and_sub1 returns previous value, so check if there's only 1 reference */
+#ifdef MVM_USE_C11_ATOMICS
+    if (atomic_fetch_sub_explicit(&code->ref_cnt, 1, memory_order_relaxed) > 1)
+        return;
+#else
     if (AO_fetch_and_sub1(&code->ref_cnt) > 1)
         return;
+#endif
     MVM_platform_free_pages(code->func_ptr, code->size);
     MVM_free(code->labels);
     MVM_free(code->deopts);

diff --git a/src/jit/x64/emit.dasc b/src/jit/x64/emit.dasc
@@ -2214,7 +2214,7 @@ void MVM_jit_emit_primitive(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJ
         | mov ARG2, aword WORK[target];
         | mov FUNCTION, OBJECT:ARG2->st;
         | mov FUNCTION, STABLE:FUNCTION->container_spec;
-        | mov FUNCTION, CONTAINERSPEC:FUNCTION->atomic_load;
+        | mov FUNCTION, CONTAINERSPEC:FUNCTION->load_atomic;
         | call FUNCTION;
         | mov WORK[result], RV
         break;
@@ -2227,7 +2227,7 @@ void MVM_jit_emit_primitive(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJ
         | mov ARG3, aword WORK[value];
         | mov FUNCTION, OBJECT:ARG2->st;
         | mov FUNCTION, STABLE:FUNCTION->container_spec;
-        | mov FUNCTION, CONTAINERSPEC:FUNCTION->atomic_store;
+        | mov FUNCTION, CONTAINERSPEC:FUNCTION->store_atomic;
         | call FUNCTION;
         break;
     }

diff --git a/src/moar.h b/src/moar.h
@@ -40,9 +40,42 @@
  */
 #include <uv.h>
 
+#ifdef MVM_USE_C11_ATOMICS
+#include <stdatomic.h>
+typedef atomic_uintptr_t AO_t;
+
+/* clang and gcc disagree on rvalue semantics of atomic types
+ * clang refuses to implicitly assign the value of an atomic variable to the
+ * regular non-atomic type. Hence we need the following for clang.
+ * Whereas gcc permits reading them as normal variables.
+ *
+ * We can also use `atomic_load_explicit` on gcc, which keeps our code simpler.
+ * Curiously the affect is different on different architectures. (This might be
+ * a gcc bug, or just ambiguity in the C standard).
+ * Using `atomic_load_explicit` instead of a simple read has these changes:
+ *
+ * * sparc64 removes `membar` instructions
+ * * arm64 changes from LFAR to LDR (LDAR has load-acquire semantics)
+ *
+ * but x86_64 and ppc64 are unchanged.
+ *
+ * suggesting that the former platforms are treating the implicit read as
+ * `memory_order_seq_cst` but the latter as `memory_order_relaxed`
+ *
+ * The latter is what we want, and is cheaper, so be explicit.
+ */
+#define AO_READ(v) atomic_load_explicit(&(v), memory_order_relaxed)
+
+/* clang also refuses to cast as (AO_t)(v), but doing this works for gcc and
+ * clang (and hopefully other compilers, when we get there) */
+#define AO_CAST(v) (uintptr_t)(v)
+#else
 /* libatomic_ops */
 #define AO_REQUIRE_CAS
 #include <atomic_ops.h>
+#define AO_READ(v) (v)
+#define AO_CAST(v) (AO_t)(v)
+#endif
 
 /* libffi or dynload/dyncall/dyncallback */
 #ifdef HAVE_LIBFFI
@@ -287,6 +320,51 @@ MVM_PUBLIC int MVM_exepath(char* buffer, size_t* size);
 MVM_PUBLIC int MVM_set_std_handles_to_nul(void);
 #endif
 
+#ifdef MVM_USE_C11_ATOMICS
+
+#define MVM_incr(addr) atomic_fetch_add((volatile AO_t *)(addr), 1)
+#define MVM_decr(addr) atomic_fetch_sub((volatile AO_t *)(addr), 1)
+#define MVM_add(addr, add) atomic_fetch_add((volatile AO_t *)(addr), (add))
+
+/* Returns non-zero for success. Use for both AO_t numbers and pointers. */
+MVM_STATIC_INLINE int
+MVM_trycas_AO(volatile AO_t *addr, uintptr_t old, const uintptr_t new) {
+    return atomic_compare_exchange_strong(addr, &old, new);
+}
+#define MVM_trycas(addr, old, new) MVM_trycas_AO((volatile AO_t *)(addr), AO_CAST(old), AO_CAST(new))
+
+
+/* Returns the old value dereferenced at addr.
+ * Strictly, as libatomic_ops documents it:
+ *      Atomically compare *addr to old_val, and replace *addr by new_val
+ *      if the first comparison succeeds; returns the original value of *addr;
+ *       cannot fail spuriously.
+ */
+MVM_STATIC_INLINE uintptr_t
+MVM_cas(volatile AO_t *addr, uintptr_t old, const uintptr_t new) {
+    /* If *addr == old then { does exchange, returns true }
+     * else { writes old value to &old, returns false }
+     * Hence if exchange happens, we return the old value because C11 doesn't
+     * overwrite &old. If exchange doesn't happen, C11 does overwrite. */
+    atomic_compare_exchange_strong(addr, &old, new);
+    return old;
+}
+
+/* Returns the old pointer value dereferenced at addr. Provided for a tiny bit of type safety. */
+#define MVM_casptr(addr, old, new) ((void *)MVM_cas((AO_t *)(addr), (uintptr_t)(old), (uintptr_t)(new)))
+
+/* Full memory barrier. */
+#define MVM_barrier() atomic_thread_fence(memory_order_seq_cst)
+
+/* Need to use these to assign to or read from any memory locations on
+ * which the other atomic operation macros are used... */
+#define MVM_store(addr, new) atomic_store((volatile AO_t *)(addr), AO_CAST(new))
+#define MVM_load(addr) atomic_load((volatile AO_t *)(addr))
+
+#else
+
+/* libatomic_ops */
+
 /* Seems that both 32 and 64 bit sparc need this crutch */
 #if defined(__s390__) || defined(__sparc__)
 AO_t AO_fetch_compare_and_swap_emulation(volatile AO_t *addr, AO_t old_val, AO_t new_val);
@@ -315,3 +393,5 @@ AO_t AO_fetch_compare_and_swap_emulation(volatile AO_t *addr, AO_t old_val, AO_t
  * which the other atomic operation macros are used... */
 #define MVM_store(addr, new) AO_store_full((volatile AO_t *)(addr), (AO_t)(new))
 #define MVM_load(addr) AO_load_full((volatile AO_t *)(addr))
+
+#endif
diff --git a/src/profiler/configuration.c b/src/profiler/configuration.c
@@ -988,7 +988,7 @@ MVMint64 MVM_confprog_run(MVMThreadContext *tc, void *subject, MVMuint8 entrypoi
     stats_slot = stats_position_for_value(NULL, entrypoint, result);
 
     if (stats_slot != -1) {
-        MVM_store(&prog->last_return_time[stats_slot], (AO_t)MVM_proc_time(tc));
+        MVM_store(&prog->last_return_time[stats_slot], AO_CAST(MVM_proc_time(tc)));
         MVM_incr(&prog->return_counts[stats_slot]);
     }