@@ -31,6 +31,16 @@ static const int16_t sleeping = 1;
3131// invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
3232// information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
3333// information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
34+ // information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
35+ // [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
36+ // * Dequeuer:
37+ // * 1a: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
38+ // * 1b: `multiq_check_empty` returns true
39+ // * Enqueuer:
40+ // * 2a: `multiq_insert`
41+ // * 2b: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
42+ // i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
43+
3444
3545JULIA_DEBUG_SLEEPWAKE (
3646uint64_t wakeup_enter ;
@@ -348,16 +358,20 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
348358}
349359
350360
351- static void wake_thread (int16_t tid )
361+ static int wake_thread (int16_t tid )
352362{
353363 jl_ptls_t other = jl_all_tls_states [tid ];
354364 int8_t state = sleeping ;
355- jl_atomic_cmpswap (& other -> sleep_check_state , & state , not_sleeping );
356- if (state == sleeping ) {
357- uv_mutex_lock (& sleep_locks [tid ]);
358- uv_cond_signal (& wake_signals [tid ]);
359- uv_mutex_unlock (& sleep_locks [tid ]);
365+
366+ if (jl_atomic_load_relaxed (& other -> sleep_check_state ) == sleeping ) {
367+ if (jl_atomic_cmpswap_relaxed (& other -> sleep_check_state , & state , not_sleeping )) {
368+ uv_mutex_lock (& sleep_locks [tid ]);
369+ uv_cond_signal (& wake_signals [tid ]);
370+ uv_mutex_unlock (& sleep_locks [tid ]);
371+ return 1 ;
372+ }
360373 }
374+ return 0 ;
361375}
362376
363377
@@ -372,37 +386,48 @@ static void wake_libuv(void)
372386JL_DLLEXPORT void jl_wakeup_thread (int16_t tid )
373387{
374388 jl_task_t * ct = jl_current_task ;
375- jl_ptls_t ptls = ct -> ptls ;
376- jl_task_t * uvlock = jl_atomic_load (& jl_uv_mutex .owner );
377389 int16_t self = jl_atomic_load_relaxed (& ct -> tid );
390+ if (tid != self )
391+ jl_fence (); // [^store_buffering_1]
392+ jl_task_t * uvlock = jl_atomic_load_relaxed (& jl_uv_mutex .owner );
378393 JULIA_DEBUG_SLEEPWAKE ( wakeup_enter = cycleclock () );
379394 if (tid == self || tid == -1 ) {
380395 // we're already awake, but make sure we'll exit uv_run
396+ jl_ptls_t ptls = ct -> ptls ;
381397 if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) == sleeping )
382- jl_atomic_store (& ptls -> sleep_check_state , not_sleeping );
398+ jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping );
383399 if (uvlock == ct )
384400 uv_stop (jl_global_event_loop ());
385401 }
386402 else {
387403 // something added to the sticky-queue: notify that thread
388- wake_thread (tid );
389- // check if we need to notify uv_run too
390- jl_task_t * system_tid = jl_atomic_load_relaxed (& jl_all_tls_states [tid ]-> current_task );
391- if (uvlock != ct && jl_atomic_load (& jl_uv_mutex .owner ) == system_tid )
392- wake_libuv ();
404+ if (wake_thread (tid )) {
405+ // check if we need to notify uv_run too
406+ jl_fence ();
407+ jl_task_t * tid_task = jl_atomic_load_relaxed (& jl_all_tls_states [tid ]-> current_task );
408+ // now that we have changed the thread to not-sleeping, ensure that
409+ // either it has not yet acquired the libuv lock, or that it will
410+ // observe the change of state to not_sleeping
411+ if (uvlock != ct && jl_atomic_load_relaxed (& jl_uv_mutex .owner ) == tid_task )
412+ wake_libuv ();
413+ }
393414 }
394415 // check if the other threads might be sleeping
395416 if (tid == -1 ) {
396417 // something added to the multi-queue: notify all threads
397418 // in the future, we might want to instead wake some fraction of threads,
398419 // and let each of those wake additional threads if they find work
420+ int anysleep = 0 ;
399421 for (tid = 0 ; tid < jl_n_threads ; tid ++ ) {
400422 if (tid != self )
401- wake_thread (tid );
423+ anysleep |= wake_thread (tid );
402424 }
403425 // check if we need to notify uv_run too
404- if (uvlock != ct && jl_atomic_load (& jl_uv_mutex .owner ) != NULL )
405- wake_libuv ();
426+ if (uvlock != ct && anysleep ) {
427+ jl_fence ();
428+ if (jl_atomic_load_relaxed (& jl_uv_mutex .owner ) != NULL )
429+ wake_libuv ();
430+ }
406431 }
407432 JULIA_DEBUG_SLEEPWAKE ( wakeup_leave = cycleclock () );
408433}
@@ -426,7 +451,9 @@ static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
426451{
427452 // sleep_check_state is only transitioned from not_sleeping to sleeping
428453 // by the thread itself. As a result, if this returns false, it will
429- // continue returning false. If it returns true, there are no guarantees.
454+ // continue returning false. If it returns true, we know the total
455+ // modification order of the fences.
456+ jl_fence (); // [^store_buffering_1]
430457 return jl_atomic_load_relaxed (& ptls -> sleep_check_state ) == sleeping ;
431458}
432459
@@ -452,18 +479,27 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
452479 jl_cpu_pause ();
453480 jl_ptls_t ptls = ct -> ptls ;
454481 if (sleep_check_after_threshold (& start_cycles ) || (!jl_atomic_load_relaxed (& _threadedregion ) && ptls -> tid == 0 )) {
455- jl_atomic_store (& ptls -> sleep_check_state , sleeping ); // acquire sleep-check lock
456- if (!multiq_check_empty ()) {
482+ // acquire sleep-check lock
483+ jl_atomic_store_relaxed (& ptls -> sleep_check_state , sleeping );
484+ jl_fence (); // [^store_buffering_1]
485+ if (!multiq_check_empty ()) { // uses relaxed loads
486+ if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping )
487+ jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
488+ continue ;
489+ }
490+ task = get_next_task (trypoptask , q ); // note: this should not yield
491+ if (ptls != ct -> ptls ) {
492+ // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
493+ ptls = ct -> ptls ;
457494 if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping )
458- jl_atomic_store (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
495+ jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
496+ if (task )
497+ return task ;
459498 continue ;
460499 }
461- task = get_next_task (trypoptask , q ); // WARNING: this should not yield
462- if (ptls != ct -> ptls )
463- continue ; // oops, get_next_task did yield--start over
464500 if (task ) {
465501 if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping )
466- jl_atomic_store (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
502+ jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
467503 return task ;
468504 }
469505
@@ -519,7 +555,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
519555 // thread 0 is the only thread permitted to run the event loop
520556 // so it needs to stay alive
521557 if (jl_atomic_load_relaxed (& ptls -> sleep_check_state ) != not_sleeping )
522- jl_atomic_store (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
558+ jl_atomic_store_relaxed (& ptls -> sleep_check_state , not_sleeping ); // let other threads know they don't need to wake us
523559 start_cycles = 0 ;
524560 continue ;
525561 }
0 commit comments