@@ -27,7 +27,7 @@ _Atomic(int) gc_n_threads_sweeping_stacks;
2727// Temporary for the `ptls->gc_tls.page_metadata_allocd` used during parallel sweeping (padded to avoid false sharing)
2828_Atomic(jl_gc_padded_page_stack_t * ) gc_allocd_scratch ;
2929// `tid` of mutator thread that triggered GC
30- _Atomic(int ) gc_master_tid ;
30+ _Atomic(int ) gc_initiator_tid ;
3131// counter for sharing work when sweeping stacks
3232_Atomic(int ) gc_ptls_sweep_idx ;
3333// counter for round robin of giving back stack pages to the OS
@@ -2514,8 +2514,7 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
25142514 }
25152515}
25162516
2517- // Used in gc-debug
2518- void gc_mark_loop_serial_ (jl_ptls_t ptls , jl_gc_markqueue_t * mq ) JL_NOTSAFEPOINT
2517+ void gc_collect_neighbors (jl_ptls_t ptls , jl_gc_markqueue_t * mq ) JL_NOTSAFEPOINT
25192518{
25202519 while (1 ) {
25212520 void * new_obj = (void * )gc_ptr_queue_pop (& ptls -> gc_tls .mark_queue );
@@ -2527,35 +2526,14 @@ void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
25272526 }
25282527}
25292528
2530- // Drain items from worker's own chunkqueue
2531- void gc_drain_own_chunkqueue (jl_ptls_t ptls , jl_gc_markqueue_t * mq ) JL_NOTSAFEPOINT
2532- {
2533- jl_gc_chunk_t c = {.cid = GC_empty_chunk };
2534- do {
2535- c = gc_chunkqueue_pop (mq );
2536- if (c .cid != GC_empty_chunk ) {
2537- gc_mark_chunk (ptls , mq , & c );
2538- gc_mark_loop_serial_ (ptls , mq );
2539- }
2540- } while (c .cid != GC_empty_chunk );
2541- }
2542-
2543- // Main mark loop. Stack (allocated on the heap) of `jl_value_t *`
2544- // is used to keep track of processed items. Maintaining this stack (instead of
2545- // native one) avoids stack overflow when marking deep objects and
2546- // makes it easier to implement parallel marking via work-stealing
2547- JL_EXTENSION NOINLINE void gc_mark_loop_serial (jl_ptls_t ptls ) JL_NOTSAFEPOINT
2548- {
2549- gc_mark_loop_serial_ (ptls , & ptls -> gc_tls .mark_queue );
2550- gc_drain_own_chunkqueue (ptls , & ptls -> gc_tls .mark_queue );
2551- }
2552-
25532529void gc_mark_and_steal (jl_ptls_t ptls ) JL_NOTSAFEPOINT
25542530{
2555- int master_tid = jl_atomic_load (& gc_master_tid );
2556- assert (master_tid != -1 );
25572531 jl_gc_markqueue_t * mq = & ptls -> gc_tls .mark_queue ;
2558- jl_gc_markqueue_t * mq_master = & gc_all_tls_states [master_tid ]-> gc_tls .mark_queue ;
2532+ jl_gc_markqueue_t * mq_initiator = mq ;
2533+ int initiator_tid = jl_atomic_load (& gc_initiator_tid );
2534+ if (initiator_tid != -1 ) {
2535+ mq_initiator = & gc_all_tls_states [initiator_tid ]-> gc_tls .mark_queue ;
2536+ }
25592537 void * new_obj ;
25602538 jl_gc_chunk_t c ;
25612539 pop : {
@@ -2604,8 +2582,8 @@ void gc_mark_and_steal(jl_ptls_t ptls) JL_NOTSAFEPOINT
26042582 goto pop ;
26052583 }
26062584 }
2607- // Try to steal chunk from master thread
2608- c = gc_chunkqueue_steal_from (mq_master );
2585+ // Try to steal chunk from initiator thread
2586+ c = gc_chunkqueue_steal_from (mq_initiator );
26092587 if (c .cid != GC_empty_chunk ) {
26102588 gc_mark_chunk (ptls , mq , & c );
26112589 goto pop ;
@@ -2629,8 +2607,8 @@ void gc_mark_and_steal(jl_ptls_t ptls) JL_NOTSAFEPOINT
26292607 if (new_obj != NULL )
26302608 goto mark ;
26312609 }
2632- // Try to steal pointer from master thread
2633- new_obj = gc_ptr_queue_steal_from (mq_master );
2610+ // Try to steal pointer from initiator thread
2611+ new_obj = gc_ptr_queue_steal_from (mq_initiator );
26342612 if (new_obj != NULL )
26352613 goto mark ;
26362614 }
@@ -2655,7 +2633,7 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
26552633 * - No work items shall be in any thread's queues when `gc_should_mark` observes
26562634 * that `gc_n_threads_marking` is zero.
26572635 *
2658- * - No work item shall be stolen from the master thread (i.e. mutator thread which started
2636+ * - No work item shall be stolen from the initiator thread (i.e. mutator thread which started
26592637 * GC and which helped the `jl_n_markthreads` - 1 threads to mark) after
26602638 * `gc_should_mark` observes that `gc_n_threads_marking` is zero. This property is
26612639 * necessary because we call `gc_mark_loop_serial` after marking the finalizer list in
@@ -2681,7 +2659,7 @@ int gc_should_mark(void) JL_NOTSAFEPOINT
26812659 if (n_threads_marking == 0 ) {
26822660 break ;
26832661 }
2684- int tid = jl_atomic_load_relaxed (& gc_master_tid );
2662+ int tid = jl_atomic_load_relaxed (& gc_initiator_tid );
26852663 assert (tid != -1 );
26862664 assert (gc_all_tls_states != NULL );
26872665 size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
@@ -2712,10 +2690,10 @@ void gc_wake_all_for_marking(jl_ptls_t ptls) JL_NOTSAFEPOINT
27122690 uv_mutex_unlock (& gc_threads_lock );
27132691}
27142692
2715- void gc_mark_loop_parallel (jl_ptls_t ptls , int master ) JL_NOTSAFEPOINT
2693+ void gc_mark_loop (jl_ptls_t ptls , int mark_loop_initiator ) JL_NOTSAFEPOINT
27162694{
2717- if (master ) {
2718- jl_atomic_store (& gc_master_tid , ptls -> tid );
2695+ if (mark_loop_initiator ) {
2696+ jl_atomic_store (& gc_initiator_tid , ptls -> tid );
27192697 jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
27202698 gc_wake_all_for_marking (ptls );
27212699 gc_mark_and_steal (ptls );
@@ -2731,20 +2709,10 @@ void gc_mark_loop_parallel(jl_ptls_t ptls, int master) JL_NOTSAFEPOINT
27312709 }
27322710}
27332711
2734- void gc_mark_loop (jl_ptls_t ptls ) JL_NOTSAFEPOINT
2735- {
2736- if (jl_n_markthreads == 0 || gc_heap_snapshot_enabled ) {
2737- gc_mark_loop_serial (ptls );
2738- }
2739- else {
2740- gc_mark_loop_parallel (ptls , 1 );
2741- }
2742- }
2743-
27442712void gc_mark_loop_barrier (void ) JL_NOTSAFEPOINT
27452713{
27462714 assert (jl_atomic_load_relaxed (& gc_n_threads_marking ) == 0 );
2747- jl_atomic_store_relaxed (& gc_master_tid , -1 );
2715+ jl_atomic_store_relaxed (& gc_initiator_tid , -1 );
27482716}
27492717
27502718void gc_mark_clean_reclaim_sets (void ) JL_NOTSAFEPOINT
@@ -3079,7 +3047,13 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
30793047 gc_invoke_callbacks (jl_gc_cb_root_scanner_t ,
30803048 gc_cblist_root_scanner , (collection ));
30813049 }
3082- gc_mark_loop (ptls );
3050+
3051+ if (single_threaded_mark ) {
3052+ gc_mark_and_steal (ptls );
3053+ }
3054+ else {
3055+ gc_mark_loop (ptls , 1 );
3056+ }
30833057 gc_mark_loop_barrier ();
30843058 gc_mark_clean_reclaim_sets ();
30853059
@@ -3108,7 +3082,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
31083082 gc_mark_finlist (mq , & finalizer_list_marked , orig_marked_len );
31093083 // "Flush" the mark stack before flipping the reset_age bit
31103084 // so that the objects are not incorrectly reset.
3111- gc_mark_loop_serial (ptls );
3085+ gc_mark_and_steal (ptls );
31123086 // Conservative marking relies on age to tell allocated objects
31133087 // and freelist entries apart.
31143088 mark_reset_age = !jl_gc_conservative_gc_support_enabled ();
@@ -3117,7 +3091,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
31173091 // and should not be referenced by any old objects so this won't break
31183092 // the GC invariant.
31193093 gc_mark_finlist (mq , & to_finalize , 0 );
3120- gc_mark_loop_serial (ptls );
3094+ gc_mark_and_steal (ptls );
31213095 mark_reset_age = 0 ;
31223096 }
31233097
@@ -3657,7 +3631,7 @@ void jl_parallel_gc_threadfun(void *arg)
36573631 }
36583632 uv_mutex_unlock (& gc_threads_lock );
36593633 assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == JL_GC_PARALLEL_COLLECTOR_THREAD );
3660- gc_mark_loop_parallel (ptls , 0 );
3634+ gc_mark_loop (ptls , 0 );
36613635 if (may_sweep_stack (ptls )) {
36623636 assert (jl_atomic_load_relaxed (& ptls -> gc_state ) == JL_GC_PARALLEL_COLLECTOR_THREAD );
36633637 sweep_stack_pool_loop ();
0 commit comments