Skip to content

Commit

Permalink
simplify handling of buffered pages (#54961)
Browse files Browse the repository at this point in the history
Simplifies handling of buffered pages by keeping them in a single place (`global_page_pool_lazily_freed`) instead of making them thread local. Performance has been assessed on the serial & multithreaded GCBenchmarks and it has shown to be performance neutral.
  • Loading branch information
d-netto committed Jun 28, 2024
1 parent 8791d54 commit 334e4d9
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 56 deletions.
85 changes: 31 additions & 54 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1293,13 +1293,7 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
// Do not pass in `ptls` as argument. This slows down the fast path
// in pool_alloc significantly
jl_ptls_t ptls = jl_current_task->ptls;
jl_gc_pagemeta_t *pg = pop_lf_back(&ptls->page_metadata_buffered);
if (pg != NULL) {
gc_alloc_map_set(pg->data, GC_PAGE_ALLOCATED);
}
else {
pg = jl_gc_alloc_page();
}
jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
pg->osize = p->osize;
pg->thread_n = ptls->tid;
set_page_metadata(pg);
Expand Down Expand Up @@ -1430,12 +1424,9 @@ STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
}
}

int64_t buffered_pages = 0;

// Walks over a page, reconstruting the free lists if the page contains at least one live object. If not,
// queues up the page for later decommit (i.e. through `madvise` on Unix).
static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *buffered,
jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg, int osize) JL_NOTSAFEPOINT
{
char *data = pg->data;
jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
Expand All @@ -1451,22 +1442,10 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
gc_page_serializer_init(s, pg);

int re_use_page = 1;
int keep_as_local_buffer = 0;
int freedall = 1;
int pg_skpd = 1;
if (!pg->has_marked) {
re_use_page = 0;
#ifdef _P64 // TODO: re-enable on `_P32`?
// lazy version: (empty) if the whole page was already unused, free it (return it to the pool)
// eager version: (freedall) free page as soon as possible
// the eager one uses less memory.
// FIXME - need to do accounting on a per-thread basis
// on quick sweeps, keep a few pages empty but allocated for performance
if (!current_sweep_full && buffered_pages <= default_collect_interval / GC_PAGE_SZ) {
buffered_pages++;
keep_as_local_buffer = 1;
}
#endif
nfree = (GC_PAGE_SZ - GC_PAGE_OFFSET) / osize;
gc_page_profile_write_empty_page(s, page_profile_enabled);
goto done;
Expand Down Expand Up @@ -1552,14 +1531,9 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
push_lf_back(allocd, pg);
}
else {
gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
if (keep_as_local_buffer) {
push_lf_back(buffered, pg);
}
else {
push_lf_back(&global_page_pool_lazily_freed, pg);
}
gc_alloc_map_set(pg->data, GC_PAGE_LAZILY_FREED);
push_lf_back(&global_page_pool_lazily_freed, pg);
}
gc_page_profile_write_to_file(s);
gc_update_page_fragmentation_data(pg);
Expand All @@ -1570,15 +1544,14 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
}

// the actual sweeping over all allocated pages in a memory pool
STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_page_stack_t *allocd, jl_gc_page_stack_t *lazily_freed,
jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_page_stack_t *allocd, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT
{
int p_n = pg->pool_n;
int t_n = pg->thread_n;
jl_ptls_t ptls2 = gc_all_tls_states[t_n];
jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
int osize = pg->osize;
gc_sweep_page(s, p, allocd, lazily_freed, pg, osize);
gc_sweep_page(s, p, allocd, pg, osize);
}

// sweep over all memory that is being used and not in a pool
Expand Down Expand Up @@ -1647,7 +1620,7 @@ int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_sc
push_lf_back_nosync(&tmp, pg);
}
else {
gc_sweep_pool_page(&serializer, dest, &ptls2->page_metadata_buffered, pg);
gc_sweep_pool_page(&serializer, dest, pg);
}
if (n_pages_to_scan >= n_pages_worth_parallel_sweep) {
break;
Expand Down Expand Up @@ -1728,7 +1701,7 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls)
if (pg == NULL) {
continue;
}
gc_sweep_pool_page(&serializer, dest, &ptls2->page_metadata_buffered, pg);
gc_sweep_pool_page(&serializer, dest, pg);
found_pg = 1;
}
if (!found_pg) {
Expand Down Expand Up @@ -1760,34 +1733,45 @@ void gc_sweep_pool_parallel(jl_ptls_t ptls)
// free all pages (i.e. through `madvise` on Linux) that were lazily freed
void gc_free_pages(void)
{
size_t n_pages_seen = 0;
jl_gc_page_stack_t tmp;
memset(&tmp, 0, sizeof(tmp));
while (1) {
jl_gc_pagemeta_t *pg = pop_lf_back(&global_page_pool_lazily_freed);
if (pg == NULL) {
break;
}
n_pages_seen++;
// keep the last few pages around for a while
if (n_pages_seen * GC_PAGE_SZ <= default_collect_interval) {
push_lf_back(&tmp, pg);
continue;
}
jl_gc_free_page(pg);
push_lf_back(&global_page_pool_freed, pg);
}
}

void gc_move_to_global_page_pool(jl_gc_page_stack_t *pgstack)
{
while (1) {
jl_gc_pagemeta_t *pg = pop_lf_back(pgstack);
if (pg == NULL) {
break;
// If concurrent page sweeping is disabled, then `gc_free_pages` will be called in the stop-the-world
// phase. We can guarantee, therefore, that there won't be any concurrent modifications to
// `global_page_pool_lazily_freed`, so it's safe to assign `tmp` back to `global_page_pool_lazily_freed`.
// Otherwise, we need to use the thread-safe push_lf_back/pop_lf_back functions.
if (jl_n_sweepthreads == 0) {
global_page_pool_lazily_freed = tmp;
}
else {
while (1) {
jl_gc_pagemeta_t *pg = pop_lf_back(&tmp);
if (pg == NULL) {
break;
}
push_lf_back(&global_page_pool_lazily_freed, pg);
}
jl_gc_free_page(pg);
jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, -GC_PAGE_SZ);
push_lf_back(&global_page_pool_freed, pg);
}
}

// setup the data-structures for a sweep over all memory pools
static void gc_sweep_pool(void)
{
gc_time_pool_start();
buffered_pages = 0;

// For the benefit of the analyzer, which doesn't know that gc_n_threads
// doesn't change over the course of this function
Expand Down Expand Up @@ -1828,12 +1812,6 @@ static void gc_sweep_pool(void)
pg->has_young = 1;
}
}
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_buffered.bottom);
while (pg != NULL) {
jl_gc_pagemeta_t *pg2 = pg->next;
buffered_pages++;
pg = pg2;
}
}

// the actual sweeping
Expand Down Expand Up @@ -3817,7 +3795,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
arraylist_free(&ptls2->finalizers);
if (ptls2->sweep_objs.len == 0)
arraylist_free(&ptls2->sweep_objs);
gc_move_to_global_page_pool(&ptls2->page_metadata_buffered);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ typedef struct _mallocarray_t {
// pool page metadata
typedef struct _jl_gc_pagemeta_t {
// next metadata structure in per-thread list
// or in one of the `jl_gc_global_page_pool_t`
// or in one of the `jl_gc_page_stack_t`
struct _jl_gc_pagemeta_t *next;
// index of pool that owns this page
uint8_t pool_n;
Expand Down
1 change: 0 additions & 1 deletion src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ typedef struct _jl_tls_states_t {
_Atomic(int16_t) suspend_count;
arraylist_t finalizers;
jl_gc_page_stack_t page_metadata_allocd;
jl_gc_page_stack_t page_metadata_buffered;
jl_gc_markqueue_t mark_queue;
jl_gc_mark_cache_t gc_cache;
arraylist_t sweep_objs;
Expand Down

0 comments on commit 334e4d9

Please sign in to comment.