Skip to content

Commit

Permalink
[runtime] Add write barrier to end of JIT info table changes
Browse files Browse the repository at this point in the history
The MERP stress test often catches a crash like so:

```
 * frame #0: 0x0000000103409827 mono-sgen`mono_get_hazardous_pointer(pp=0x00007fc559a1a1f8, hp=0x00000001037e9600, hazard_index=1) at hazard-pointer.c:208
    frame mono#1: 0x000000010320eb8e mono-sgen`jit_info_table_chunk_index(chunk=0x00007fc4186060a0, hp=0x00000001037e9600, addr=0x00007fff73bffb66) at jit-info.c:200
    frame mono#2: 0x000000010320d7fa mono-sgen`jit_info_table_find(table=0x00007fc4186057c0, hp=0x00000001037e9600, addr=0x00007fff73bffb66) at jit-info.c:222
    frame mono#3: 0x000000010320d5e6 mono-sgen`mono_jit_info_table_find_internal(domain=0x00007fc418512bc0, addr=0x00007fff73bffb66, try_aot=1, allow_trampolines=1) at jit-info.c:293
    frame mono#4: 0x00000001030d67af mono-sgen`sigabrt_signal_handler(_dummy=6, _info=0x00007ffeecd4ea28, context=0x00007ffeecd4ea90) at mini-posix.c:219
    frame mono#5: 0x00007fff73dbdf5a libsystem_platform.dylib`_sigtramp + 26
    frame mono#6: 0x00007fff73bffb67 libsystem_kernel.dylib`__pthread_kill + 11
    frame mono#7: 0x00007fff73dca080 libsystem_pthread.dylib`pthread_kill + 333
    frame mono#8: 0x00007fff73b5b1ae libsystem_c.dylib`abort + 127
    frame mono#9: 0x00007fff73c59822 libsystem_malloc.dylib`free + 521
    frame mono#10: 0x0000000103c7d9d4 libtest.0.dylib`monoeg_g_free(ptr=0x00007ffeecd4ec34) at gmem.c:86
    frame mono#11: 0x0000000103c7733b libtest.0.dylib`mono_test_MerpCrashMalloc at libtest.c:7710
    frame mono#12: 0x0000000103c5c288
    frame mono#13: 0x000000010385dfd1
    frame mono#14: 0x0000000102ec85c3 mono-sgen`mono_jit_runtime_invoke(method=0x00007fc418514288, obj=0x0000000000000000, params=0x00007ffeecd4f180, exc=0x00007ffeecd4ef08, error=0x00007ffeecd4f250) at mini-runtime.c:3215
    frame mono#15: 0x000000010326f39d mono-sgen`do_runtime_invoke(method=0x00007fc418514288, obj=0x0000000000000000, params=0x00007ffeecd4f180, exc=0x0000000000000000, error=0x00007ffeecd4f250) at object.c:2977
    frame mono#16: 0x0000000103267c61 mono-sgen`mono_runtime_invoke_checked(method=0x00007fc418514288, obj=0x0000000000000000, params=0x00007ffeecd4f180, error=0x00007ffeecd4f250) at object.c:3145
    frame mono#17: 0x0000000103274d58 mono-sgen`do_exec_main_checked(method=0x00007fc418514288, args=0x00000001040003e8, error=0x00007ffeecd4f250) at object.c:5042
    frame mono#18: 0x0000000103272b03 mono-sgen`mono_runtime_exec_main_checked(method=0x00007fc418514288, args=0x00000001040003e8, error=0x00007ffeecd4f250) at object.c:5138
    frame mono#19: 0x0000000103272b56 mono-sgen`mono_runtime_run_main_checked(method=0x00007fc418514288, argc=2, argv=0x00007ffeecd4f760, error=0x00007ffeecd4f250) at object.c:4599
    frame mono#20: 0x0000000102f78bff mono-sgen`mono_jit_exec_internal(domain=0x00007fc418512bc0, assembly=0x00007fc4185445f0, argc=2, argv=0x00007ffeecd4f760) at driver.c:1298
    frame mono#21: 0x0000000102f78a2d mono-sgen`mono_jit_exec(domain=0x00007fc418512bc0, assembly=0x00007fc4185445f0, argc=2, argv=0x00007ffeecd4f760) at driver.c:1257
    frame mono#22: 0x0000000102f7d60f mono-sgen`main_thread_handler(user_data=0x00007ffeecd4f6a0) at driver.c:1375
    frame mono#23: 0x0000000102f7b8dd mono-sgen`mono_main(argc=3, argv=0x00007ffeecd4f758) at driver.c:2551
    frame mono#24: 0x0000000102eb1e4e mono-sgen`mono_main_with_options(argc=3, argv=0x00007ffeecd4f758) at main.c:50
    frame mono#25: 0x0000000102eb145d mono-sgen`main(argc=3, argv=0x00007ffeecd4f758) at main.c:406
    frame mono#26: 0x00007fff73aaf015 libdyld.dylib`start + 1
    frame mono#27: 0x00007fff73aaf015 libdyld.dylib`start + 1
```

because of

```
(lldb) p table->num_chunks
(int) $24 = 6
(lldb) p table->chunks [0]->num_elements
(volatile int) $18 = 50
(lldb) p table->chunks [1]->num_elements
(volatile int) $19 = 48
(lldb) p table->chunks [2]->num_elements
(volatile int) $20 = 48
(lldb) p table->chunks [3]->num_elements
(volatile int) $21 = 32
(lldb) p table->chunks [4]->num_elements
(volatile int) $23 = 32
(lldb) p table->chunks [5]->num_elements
(volatile int) $22 = 1347440720
```
.

It reproduces very often. Adding assertions / memory fences at the end
of the add/remove functions seems to remove this crash.

Looking at the code in the add function, I can picture how a missing
fence right at the end could lead to what we were seeing.

Not 100% happy with this fix, as it's going to have a perf cost. We have
an earlier fence that might be able to be coalesced into this one, but
the logic as-is makes that look unsafe.
  • Loading branch information
alexanderkyte committed Jan 7, 2019
1 parent 711a618 commit 6adba61
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 23 deletions.
2 changes: 1 addition & 1 deletion mono/metadata/domain-internals.h
Expand Up @@ -63,7 +63,7 @@ typedef struct _MonoJitInfoTableChunk MonoJitInfoTableChunk;
struct _MonoJitInfoTableChunk
{
int refcount;
volatile int num_elements;
volatile gint32 num_elements;
volatile gint8 *last_code_end;
MonoJitInfo *next_tombstone;
MonoJitInfo * volatile data [MONO_JIT_INFO_TABLE_CHUNK_SIZE];
Expand Down
60 changes: 39 additions & 21 deletions mono/metadata/jit-info.c
Expand Up @@ -170,13 +170,15 @@ mono_jit_info_table_free (MonoJitInfoTable *table)
static int
jit_info_table_index (MonoJitInfoTable *table, gint8 *addr)
{
mono_memory_barrier ();
int left = 0, right = table->num_chunks;

g_assert (left < right);

do {
int pos = (left + right) / 2;
MonoJitInfoTableChunk *chunk = table->chunks [pos];
g_assert (table->num_chunks <= MONO_JIT_INFO_TABLE_CHUNK_SIZE);

if (addr < chunk->last_code_end)
right = pos;
Expand All @@ -193,7 +195,7 @@ jit_info_table_index (MonoJitInfoTable *table, gint8 *addr)
static int
jit_info_table_chunk_index (MonoJitInfoTableChunk *chunk, MonoThreadHazardPointers *hp, gint8 *addr)
{
int left = 0, right = chunk->num_elements;
int left = 0, right = mono_atomic_load_i32 (&chunk->num_elements);

while (left < right) {
int pos = (left + right) / 2;
Expand Down Expand Up @@ -229,7 +231,7 @@ jit_info_table_find (MonoJitInfoTable *table, MonoThreadHazardPointers *hp, gint
do {
MonoJitInfoTableChunk *chunk = table->chunks [chunk_pos];

while (pos < chunk->num_elements) {
while (pos < mono_atomic_load_i32 (&chunk->num_elements)) {
ji = (MonoJitInfo *)mono_get_hazardous_pointer ((gpointer volatile*)&chunk->data [pos], hp, JIT_INFO_HAZARD_INDEX);

++pos;
Expand Down Expand Up @@ -347,27 +349,27 @@ jit_info_table_check (MonoJitInfoTable *table)
g_assert (chunk->refcount > 0 /* && chunk->refcount <= 8 */);
if (chunk->refcount > 10)
printf("warning: chunk refcount is %d\n", chunk->refcount);
g_assert (chunk->num_elements <= MONO_JIT_INFO_TABLE_CHUNK_SIZE);
g_assert (mono_atomic_load_i32 (&chunk->num_elements) <= MONO_JIT_INFO_TABLE_CHUNK_SIZE);

for (j = 0; j < chunk->num_elements; ++j) {
for (j = 0; j < mono_atomic_load_i32 (&chunk->num_elements); ++j) {
MonoJitInfo *this_ji = chunk->data [j];
MonoJitInfo *next;

g_assert ((gint8*)this_ji->code_start + this_ji->code_size <= chunk->last_code_end);

if (j < chunk->num_elements - 1)
if (j < mono_atomic_load_i32 (&chunk->num_elements) - 1)
next = chunk->data [j + 1];
else if (i < table->num_chunks - 1) {
int k;

for (k = i + 1; k < table->num_chunks; ++k)
if (table->chunks [k]->num_elements > 0)
if (mono_atomic_load_i32 (&table->chunks [k]->num_elements) > 0)
break;

if (k >= table->num_chunks)
return;

g_assert (table->chunks [k]->num_elements > 0);
g_assert (mono_atomic_load_i32 (&table->chunks [k]->num_elements) > 0);
next = table->chunks [k]->data [0];
} else
return;
Expand Down Expand Up @@ -408,15 +410,18 @@ jit_info_table_realloc (MonoJitInfoTable *old)
new_element = 0;
for (i = 0; i < old->num_chunks; ++i) {
MonoJitInfoTableChunk *chunk = old->chunks [i];
int chunk_num_elements = chunk->num_elements;
int chunk_num_elements = mono_atomic_load_i32 (&chunk->num_elements);
int j;

for (j = 0; j < chunk_num_elements; ++j) {
if (!IS_JIT_INFO_TOMBSTONE (chunk->data [j])) {
g_assert (new_chunk < num_chunks);
result->chunks [new_chunk]->data [new_element] = chunk->data [j];
if (++new_element >= JIT_INFO_TABLE_FILLED_NUM_ELEMENTS) {
result->chunks [new_chunk]->num_elements = new_element;

g_assert (new_element < 30000);
mono_atomic_store_i32 (&result->chunks [new_chunk]->num_elements, new_element);

++new_chunk;
new_element = 0;
}
Expand All @@ -426,17 +431,22 @@ jit_info_table_realloc (MonoJitInfoTable *old)

if (new_chunk < num_chunks) {
g_assert (new_chunk == num_chunks - 1);
result->chunks [new_chunk]->num_elements = new_element;
g_assert (result->chunks [new_chunk]->num_elements > 0);

g_assert (new_element < 30000);
mono_atomic_store_i32 (&result->chunks [new_chunk]->num_elements, new_element);
g_assert (new_element > 0);
}

for (i = 0; i < num_chunks; ++i) {
MonoJitInfoTableChunk *chunk = result->chunks [i];
MonoJitInfo *ji = chunk->data [chunk->num_elements - 1];
MonoJitInfo *ji = chunk->data [mono_atomic_load_i32 (&chunk->num_elements) - 1];

result->chunks [i]->last_code_end = (gint8*)ji->code_start + ji->code_size;
}

for (int i=0; i < result->num_chunks; i++) {
g_assert (mono_atomic_load_i32 (&result->chunks [i]->num_elements) < 30000);
}
return result;
}

Expand All @@ -446,10 +456,12 @@ jit_info_table_split_chunk (MonoJitInfoTableChunk *chunk, MonoJitInfoTableChunk
MonoJitInfoTableChunk *new1 = jit_info_table_new_chunk ();
MonoJitInfoTableChunk *new2 = jit_info_table_new_chunk ();

g_assert (chunk->num_elements == MONO_JIT_INFO_TABLE_CHUNK_SIZE);
g_assert (mono_atomic_load_i32 (&chunk->num_elements) == MONO_JIT_INFO_TABLE_CHUNK_SIZE);

new1->num_elements = MONO_JIT_INFO_TABLE_CHUNK_SIZE / 2;
new2->num_elements = MONO_JIT_INFO_TABLE_CHUNK_SIZE - new1->num_elements;
g_assert (new1->num_elements < 300000);
g_assert (new2->num_elements < 300000);

memcpy ((void*)new1->data, (void*)chunk->data, sizeof (MonoJitInfo*) * new1->num_elements);
memcpy ((void*)new2->data, (void*)(chunk->data + new1->num_elements), sizeof (MonoJitInfo*) * new2->num_elements);
Expand Down Expand Up @@ -498,12 +510,15 @@ jit_info_table_purify_chunk (MonoJitInfoTableChunk *old)
int i, j;

j = 0;
for (i = 0; i < old->num_elements; ++i) {
int num_elements = mono_atomic_load_i32 (&old->num_elements);
for (i = 0; i < num_elements; ++i) {
if (!IS_JIT_INFO_TOMBSTONE (old->data [i]))
result->data [j++] = old->data [i];
}

result->num_elements = j;
g_assert (j < 300000);

if (result->num_elements > 0)
result->last_code_end = (gint8*)result->data [j - 1]->code_start + result->data [j - 1]->code_size;
else
Expand Down Expand Up @@ -603,14 +618,14 @@ jit_info_table_add (MonoDomain *domain, MonoJitInfoTable *volatile *table_ptr, M
int num_elements;
int i;

table = *table_ptr;
table = (MonoJitInfoTable *) mono_atomic_load_ptr ((volatile gpointer *) table_ptr);

restart:
chunk_pos = jit_info_table_index (table, (gint8*)ji->code_start + ji->code_size);
g_assert (chunk_pos < table->num_chunks);
chunk = table->chunks [chunk_pos];

if (chunk->num_elements >= MONO_JIT_INFO_TABLE_CHUNK_SIZE) {
if (mono_atomic_load_i32 (&chunk->num_elements) >= MONO_JIT_INFO_TABLE_CHUNK_SIZE) {
MonoJitInfoTable *new_table = jit_info_table_chunk_overflow (table, chunk);

/* Debugging code, should be removed. */
Expand All @@ -628,7 +643,7 @@ jit_info_table_add (MonoDomain *domain, MonoJitInfoTable *volatile *table_ptr, M
/* Debugging code, should be removed. */
//jit_info_table_check (table);

num_elements = chunk->num_elements;
num_elements = mono_atomic_load_i32 (&chunk->num_elements);

pos = jit_info_table_chunk_index (chunk, NULL, (gint8*)ji->code_start + ji->code_size);

Expand All @@ -640,7 +655,8 @@ jit_info_table_add (MonoDomain *domain, MonoJitInfoTable *volatile *table_ptr, M
else
chunk->data [0] = ji;
mono_memory_write_barrier ();
chunk->num_elements = ++num_elements;
mono_atomic_store_i32 (&chunk->num_elements, ++num_elements);
g_assert (num_elements < 300000);

/* Shift the elements up one by one. */
for (i = num_elements - 2; i >= pos; --i) {
Expand All @@ -653,13 +669,14 @@ jit_info_table_add (MonoDomain *domain, MonoJitInfoTable *volatile *table_ptr, M
chunk->data [pos] = ji;

/* Set the high code end address chunk entry. */
chunk->last_code_end = (gint8*)chunk->data [chunk->num_elements - 1]->code_start
+ chunk->data [chunk->num_elements - 1]->code_size;
chunk->last_code_end = (gint8*)chunk->data [mono_atomic_load_i32 (&chunk->num_elements) - 1]->code_start
+ chunk->data [mono_atomic_load_i32 (&chunk->num_elements) - 1]->code_size;

++table->num_valid;

/* Debugging code, should be removed. */
//jit_info_table_check (table);
mono_memory_write_barrier ();
}

void
Expand Down Expand Up @@ -727,7 +744,7 @@ jit_info_table_remove (MonoJitInfoTable *table, MonoJitInfo *ji)
do {
chunk = table->chunks [chunk_pos];

while (pos < chunk->num_elements) {
while (pos < mono_atomic_load_i32 (&chunk->num_elements)) {
if (chunk->data [pos] == ji)
goto found;

Expand All @@ -750,6 +767,7 @@ jit_info_table_remove (MonoJitInfoTable *table, MonoJitInfo *ji)

/* Debugging code, should be removed. */
//jit_info_table_check (table);
mono_memory_write_barrier ();
}

void
Expand Down
18 changes: 17 additions & 1 deletion mono/mini/mini-posix.c
Expand Up @@ -878,6 +878,7 @@ mono_runtime_setup_stat_profiler (void)
static void
dump_memory_around_ip (void *ctx)
{
#if 0
#ifdef MONO_ARCH_HAVE_SIGCTX_TO_MONOCTX
if (!ctx)
return;
Expand All @@ -892,6 +893,7 @@ dump_memory_around_ip (void *ctx)
mono_runtime_printf_err ("instruction pointer is NULL, skip dumping");
}
#endif
#endif
}

static void
Expand Down Expand Up @@ -934,9 +936,13 @@ assert_printer_callback (void)
static void
dump_native_stacktrace (const char *signal, void *ctx)
{
mono_memory_barrier ();
static gint32 middle_of_crash = 0x0;
gint32 double_faulted = mono_atomic_cas_i32 ((gint32 *)&middle_of_crash, 0x1, 0x0);
mono_memory_write_barrier ();

if (!double_faulted) {
mono_runtime_printf_err ("Dumping, not in media res %d\n", double_faulted);
g_assertion_disable_global (assert_printer_callback);
} else {
mono_runtime_printf_err ("\nAn error has occured in the native fault reporting. Some diagnostic information will be unavailable.\n");
Expand All @@ -950,13 +956,18 @@ dump_native_stacktrace (const char *signal, void *ctx)
char **names;
int i, size;

#if 0
// Crashes a lot in backtrace_symbols
// FIXME: make this work using dladdr

mono_runtime_printf_err ("\nNative stacktrace:\n");

size = backtrace (array, 256);
names = backtrace_symbols (array, size);
for (i = 0; i < size; ++i) {
mono_runtime_printf_err ("\t%s", names [i]);
}
#endif

/* Try to get more meaningful information using gdb */
// FIXME: Remove locking and reenable. Can race with itself
Expand Down Expand Up @@ -1054,11 +1065,16 @@ dump_native_stacktrace (const char *signal, void *ctx)
if (!double_faulted && mono_merp_enabled ()) {
if (pid == 0) {
if (output) {
mono_runtime_printf_err ("\nBefore merp upload\n");
gboolean merp_upload_success = mono_merp_invoke (crashed_pid, signal, output, &hashes);

if (!merp_upload_success)
mono_runtime_printf_err ("\nThe MERP upload step has failed.\n");
else
else {
// Remove
mono_runtime_printf_err ("\nThe MERP upload step has succeeded.\n");
mono_summarize_timeline_phase_log (MonoSummaryDone);
}

mono_summarize_toggle_assertions (FALSE);
} else {
Expand Down

0 comments on commit 6adba61

Please sign in to comment.