Skip to content

Commit 6df6105

Browse files
committed
scheduler: use explicit memory fences
These were previously implied (on TSO platforms, such as x86) by the atomic_store to sleeping and the sleep_locks acquire before the wake_check loop, but this makes it more explicit. We might want to consider in the future if it would be better (faster) to acquire each possible lock on the sleeping path instead, so that we do each operation with seq_cst, instead of using a fence to only order the operations we care about directly.
1 parent 9c7cfa9 commit 6df6105

File tree

1 file changed

+62
-26
lines changed

1 file changed

+62
-26
lines changed

src/partr.c

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ static const int16_t sleeping = 1;
3131
// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
3232
// information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
3333
// information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
34+
// information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
35+
// [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
36+
// * Dequeuer:
37+
// * 1a: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
38+
// * 1b: `multiq_check_empty` returns true
39+
// * Enqueuer:
40+
// * 2a: `multiq_insert`
41+
// * 2b: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
42+
// i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
43+
3444

3545
JULIA_DEBUG_SLEEPWAKE(
3646
uint64_t wakeup_enter;
@@ -348,16 +358,20 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
348358
}
349359

350360

351-
static void wake_thread(int16_t tid)
361+
static int wake_thread(int16_t tid)
352362
{
353363
jl_ptls_t other = jl_all_tls_states[tid];
354364
int8_t state = sleeping;
355-
jl_atomic_cmpswap(&other->sleep_check_state, &state, not_sleeping);
356-
if (state == sleeping) {
357-
uv_mutex_lock(&sleep_locks[tid]);
358-
uv_cond_signal(&wake_signals[tid]);
359-
uv_mutex_unlock(&sleep_locks[tid]);
365+
366+
if (jl_atomic_load_relaxed(&other->sleep_check_state) == sleeping) {
367+
if (jl_atomic_cmpswap_relaxed(&other->sleep_check_state, &state, not_sleeping)) {
368+
uv_mutex_lock(&sleep_locks[tid]);
369+
uv_cond_signal(&wake_signals[tid]);
370+
uv_mutex_unlock(&sleep_locks[tid]);
371+
return 1;
372+
}
360373
}
374+
return 0;
361375
}
362376

363377

@@ -372,37 +386,48 @@ static void wake_libuv(void)
372386
JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
373387
{
374388
jl_task_t *ct = jl_current_task;
375-
jl_ptls_t ptls = ct->ptls;
376-
jl_task_t *uvlock = jl_atomic_load(&jl_uv_mutex.owner);
377389
int16_t self = jl_atomic_load_relaxed(&ct->tid);
390+
if (tid != self)
391+
jl_fence(); // [^store_buffering_1]
392+
jl_task_t *uvlock = jl_atomic_load_relaxed(&jl_uv_mutex.owner);
378393
JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
379394
if (tid == self || tid == -1) {
380395
// we're already awake, but make sure we'll exit uv_run
396+
jl_ptls_t ptls = ct->ptls;
381397
if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping)
382-
jl_atomic_store(&ptls->sleep_check_state, not_sleeping);
398+
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping);
383399
if (uvlock == ct)
384400
uv_stop(jl_global_event_loop());
385401
}
386402
else {
387403
// something added to the sticky-queue: notify that thread
388-
wake_thread(tid);
389-
// check if we need to notify uv_run too
390-
jl_task_t *system_tid = jl_atomic_load_relaxed(&jl_all_tls_states[tid]->current_task);
391-
if (uvlock != ct && jl_atomic_load(&jl_uv_mutex.owner) == system_tid)
392-
wake_libuv();
404+
if (wake_thread(tid)) {
405+
// check if we need to notify uv_run too
406+
jl_fence();
407+
jl_task_t *tid_task = jl_atomic_load_relaxed(&jl_all_tls_states[tid]->current_task);
408+
// now that we have changed the thread to not-sleeping, ensure that
409+
// either it has not yet acquired the libuv lock, or that it will
410+
// observe the change of state to not_sleeping
411+
if (uvlock != ct && jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
412+
wake_libuv();
413+
}
393414
}
394415
// check if the other threads might be sleeping
395416
if (tid == -1) {
396417
// something added to the multi-queue: notify all threads
397418
// in the future, we might want to instead wake some fraction of threads,
398419
// and let each of those wake additional threads if they find work
420+
int anysleep = 0;
399421
for (tid = 0; tid < jl_n_threads; tid++) {
400422
if (tid != self)
401-
wake_thread(tid);
423+
anysleep |= wake_thread(tid);
402424
}
403425
// check if we need to notify uv_run too
404-
if (uvlock != ct && jl_atomic_load(&jl_uv_mutex.owner) != NULL)
405-
wake_libuv();
426+
if (uvlock != ct && anysleep) {
427+
jl_fence();
428+
if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) != NULL)
429+
wake_libuv();
430+
}
406431
}
407432
JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
408433
}
@@ -426,7 +451,9 @@ static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
426451
{
427452
// sleep_check_state is only transitioned from not_sleeping to sleeping
428453
// by the thread itself. As a result, if this returns false, it will
429-
// continue returning false. If it returns true, there are no guarantees.
454+
// continue returning false. If it returns true, we know the total
455+
// modification order of the fences.
456+
jl_fence(); // [^store_buffering_1]
430457
return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
431458
}
432459

@@ -452,18 +479,27 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
452479
jl_cpu_pause();
453480
jl_ptls_t ptls = ct->ptls;
454481
if (sleep_check_after_threshold(&start_cycles) || (!jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0)) {
455-
jl_atomic_store(&ptls->sleep_check_state, sleeping); // acquire sleep-check lock
456-
if (!multiq_check_empty()) {
482+
// acquire sleep-check lock
483+
jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
484+
jl_fence(); // [^store_buffering_1]
485+
if (!multiq_check_empty()) { // uses relaxed loads
486+
if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
487+
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
488+
continue;
489+
}
490+
task = get_next_task(trypoptask, q); // note: this should not yield
491+
if (ptls != ct->ptls) {
492+
// sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
493+
ptls = ct->ptls;
457494
if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
458-
jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
495+
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
496+
if (task)
497+
return task;
459498
continue;
460499
}
461-
task = get_next_task(trypoptask, q); // WARNING: this should not yield
462-
if (ptls != ct->ptls)
463-
continue; // oops, get_next_task did yield--start over
464500
if (task) {
465501
if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
466-
jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
502+
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
467503
return task;
468504
}
469505

@@ -519,7 +555,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
519555
// thread 0 is the only thread permitted to run the event loop
520556
// so it needs to stay alive
521557
if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
522-
jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
558+
jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
523559
start_cycles = 0;
524560
continue;
525561
}

0 commit comments

Comments
 (0)