Skip to content

Commit

Permalink
improvements on GC scheduler shutdown
Browse files Browse the repository at this point in the history
  • Loading branch information
d-netto committed Sep 21, 2023
1 parent 5fc5556 commit 758605b
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 63 deletions.
132 changes: 97 additions & 35 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2742,13 +2742,16 @@ JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
}

void gc_mark_and_steal(jl_ptls_t ptls)
int gc_mark_and_steal(jl_ptls_t ptls)
{
jl_gc_markqueue_t *mq = &ptls->mark_queue;
jl_gc_markqueue_t *mq_master = NULL;
int master_tid = jl_atomic_load(&gc_master_tid);
if (master_tid != -1)
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
if (master_tid == -1) {
return 0;
}
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
int marked = 0;
void *new_obj;
jl_gc_chunk_t c;
pop : {
Expand All @@ -2764,6 +2767,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
goto steal;
}
mark : {
marked = 1;
gc_mark_outrefs(ptls, mq, new_obj, 0);
goto pop;
}
Expand Down Expand Up @@ -2792,12 +2796,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
}
}
// Try to steal chunk from master thread
if (mq_master != NULL) {
c = gc_chunkqueue_steal_from(mq_master);
if (c.cid != GC_empty_chunk) {
gc_mark_chunk(ptls, mq, &c);
goto pop;
}
c = gc_chunkqueue_steal_from(mq_master);
if (c.cid != GC_empty_chunk) {
gc_mark_chunk(ptls, mq, &c);
goto pop;
}
// Try to steal pointer from random GC thread
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
Expand All @@ -2814,37 +2816,98 @@ void gc_mark_and_steal(jl_ptls_t ptls)
if (new_obj != NULL)
goto mark;
}
// Try to steal pointer from master thread
if (mq_master != NULL) {
new_obj = gc_ptr_queue_steal_from(mq_master);
if (new_obj != NULL)
goto mark;
}
new_obj = gc_ptr_queue_steal_from(mq_master);
if (new_obj != NULL)
goto mark;
}
return marked;
}

void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
#define GC_BACKOFF_MIN_LG2 (1 << 3)
#define GC_BACKOFF_MAX_LG2 (1 << 11)

STATIC_INLINE void gc_sched_yield_reset_state(gc_sched_state_t *s) JL_NOTSAFEPOINT
{
int backoff = GC_BACKOFF_MIN;
if (master) {
jl_atomic_store(&gc_master_tid, ptls->tid);
// Wake threads up and try to do some work
uv_mutex_lock(&gc_threads_lock);
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
uv_cond_broadcast(&gc_threads_cond);
uv_mutex_unlock(&gc_threads_lock);
gc_mark_and_steal(ptls);
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
s->yield_phase = GC_SPINNING;
s->backoff_lg2 = GC_BACKOFF_MIN_LG2;
s->n_spins_at_max = 0;
}

STATIC_INLINE void gc_sched_yield(gc_sched_state_t *s) JL_NOTSAFEPOINT
{
if (s->yield_phase == GC_SPINNING) {
// spin for 2^backoff_lg2 iterations
for (int i = 0; i < (1 << s->backoff_lg2); i++) {
jl_cpu_pause();
}
if (s->backoff_lg2 == GC_BACKOFF_MAX_LG2) {
s->n_spins_at_max++;
// has been spinning for a while... should
// just sleep in the next failed steal attempt
if (s->n_spins_at_max >= 4) {
s->yield_phase = GC_SLEEPING;
}
}
else {
s->backoff_lg2++;
}
}
else {
// sleep for 1ms
uv_sleep(1);
}
}

void gc_mark_loop_master_init(jl_ptls_t ptls)
{
jl_atomic_store(&gc_master_tid, ptls->tid);
// Wake threads up and try to do some work
uv_mutex_lock(&gc_threads_lock);
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
uv_cond_broadcast(&gc_threads_cond);
uv_mutex_unlock(&gc_threads_lock);
gc_mark_and_steal(ptls);
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
}

void gc_mark_loop_parallel(jl_ptls_t ptls)
{
gc_sched_state_t s;
gc_sched_yield_reset_state(&s);
while (jl_atomic_load(&gc_n_threads_marking) > 0) {
// Try to become a thief while other threads are marking
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
if (jl_atomic_load(&gc_master_tid) != -1) {
gc_mark_and_steal(ptls);
}
int marked = gc_mark_and_steal(ptls);
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
// Failed to steal
gc_backoff(&backoff);
if (marked) {
gc_sched_yield_reset_state(&s);
}
else {
gc_sched_yield(&s);
}
}
}

void gc_mark_loop_master(jl_ptls_t ptls)
{
gc_mark_loop_master_init(ptls);
gc_mark_loop_parallel(ptls);
}

STATIC_INLINE int gc_may_mark(void) JL_NOTSAFEPOINT
{
return jl_atomic_load(&gc_n_threads_marking) > 0;
}

void gc_mark_loop_worker(jl_ptls_t ptls)
{
while (1) {
uv_mutex_lock(&gc_threads_lock);
while (!gc_may_mark()) {
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
}
uv_mutex_unlock(&gc_threads_lock);
gc_mark_loop_parallel(ptls);
}
}

Expand All @@ -2854,16 +2917,15 @@ void gc_mark_loop(jl_ptls_t ptls)
gc_mark_loop_serial(ptls);
}
else {
gc_mark_loop_parallel(ptls, 1);
gc_mark_loop_master(ptls);
}
}

void gc_mark_loop_barrier(void)
{
jl_atomic_store(&gc_master_tid, -1);
while (jl_atomic_load(&gc_n_threads_marking) != 0) {
jl_cpu_pause();
}
while (jl_atomic_load(&gc_n_threads_marking) != 0)
;
}

void gc_mark_clean_reclaim_sets(void)
Expand Down
25 changes: 11 additions & 14 deletions src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ typedef struct _jl_gc_chunk_t {
#define GC_PTR_QUEUE_INIT_SIZE (1 << 18) // initial size of queue of `jl_value_t *`
#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14) // initial size of chunk-queue

// State used for GC scheduling
typedef struct {
#define GC_SPINNING 0
#define GC_SLEEPING 1
uint8_t yield_phase; // whether the thread is spinning or sleeping
// between failed steal attempts
size_t backoff_lg2; // expontial backoff log counter
size_t n_spins_at_max; // number of times it spinned at the maximum backoff
} gc_sched_state_t;

// layout for big (>2k) objects

JL_EXTENSION typedef struct _bigval_t {
Expand Down Expand Up @@ -190,19 +200,6 @@ extern jl_gc_global_page_pool_t global_page_pool_lazily_freed;
extern jl_gc_global_page_pool_t global_page_pool_clean;
extern jl_gc_global_page_pool_t global_page_pool_freed;

#define GC_BACKOFF_MIN 4
#define GC_BACKOFF_MAX 12

STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT
{
if (*i < GC_BACKOFF_MAX) {
(*i)++;
}
for (int j = 0; j < (1 << *i); j++) {
jl_cpu_pause();
}
}

// Lock-free stack implementation taken
// from Herlihy's "The Art of Multiprocessor Programming"
// XXX: this is not a general-purpose lock-free stack. We can
Expand Down Expand Up @@ -460,7 +457,7 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
void gc_mark_loop_serial(jl_ptls_t ptls);
void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
void gc_mark_loop_worker(jl_ptls_t ptls);
void sweep_stack_pools(void);
void jl_gc_debug_init(void);

Expand Down
15 changes: 1 addition & 14 deletions src/partr.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,6 @@ void jl_init_threadinginfra(void)

void JL_NORETURN jl_finish_task(jl_task_t *t);


static inline int may_mark(void) JL_NOTSAFEPOINT
{
return (jl_atomic_load(&gc_n_threads_marking) > 0);
}

// gc thread mark function
void jl_gc_mark_threadfun(void *arg)
{
Expand All @@ -128,14 +122,7 @@ void jl_gc_mark_threadfun(void *arg)
// free the thread argument here
free(targ);

while (1) {
uv_mutex_lock(&gc_threads_lock);
while (!may_mark()) {
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
}
uv_mutex_unlock(&gc_threads_lock);
gc_mark_loop_parallel(ptls, 0);
}
gc_mark_loop_worker(ptls);
}

// gc thread sweep function
Expand Down

0 comments on commit 758605b

Please sign in to comment.