Skip to content

Commit

Permalink
create GC TLS
Browse files Browse the repository at this point in the history
  • Loading branch information
d-netto committed Jul 9, 2024
1 parent 2759961 commit f4267f8
Show file tree
Hide file tree
Showing 10 changed files with 301 additions and 277 deletions.
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
UV_HEADERS += uv.h
UV_HEADERS += uv/*.h
endif
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
ifeq ($(OS),WINNT)
PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
endif
Expand Down
2 changes: 1 addition & 1 deletion src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
if (sz <= GC_MAX_SZCLASS) {
int pool_id = jl_gc_szclass_align8(allocsz);
jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
int osize = jl_gc_sizeclasses[pool_id];
// We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
// the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
Expand Down
32 changes: 16 additions & 16 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ static arraylist_t bits_save[4];
static void gc_clear_mark_page(jl_gc_pagemeta_t *pg, int bits)
{
jl_ptls_t ptls2 = gc_all_tls_states[pg->thread_n];
jl_gc_pool_t *pool = &ptls2->heap.norm_pools[pg->pool_n];
jl_gc_pool_t *pool = &ptls2->gc_tls.heap.norm_pools[pg->pool_n];
jl_taggedvalue_t *pv = (jl_taggedvalue_t*)(pg->data + GC_PAGE_OFFSET);
char *lim = (char*)pv + GC_PAGE_SZ - GC_PAGE_OFFSET - pool->osize;
while ((char*)pv <= lim) {
Expand All @@ -114,7 +114,7 @@ static void gc_clear_mark_outer(int bits)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
gc_clear_mark_page(pg, bits);
pg = pg->next;
Expand All @@ -134,7 +134,7 @@ static void clear_mark(int bits)
}
bigval_t *v;
for (int i = 0; i < gc_n_threads; i++) {
v = gc_all_tls_states[i]->heap.big_objects;
v = gc_all_tls_states[i]->gc_tls.heap.big_objects;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand Down Expand Up @@ -172,7 +172,7 @@ static void gc_verify_track(jl_ptls_t ptls)
return;
do {
jl_gc_markqueue_t mq;
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
ws_queue_t *cq = &mq.chunk_queue;
ws_queue_t *q = &mq.ptr_queue;
jl_atomic_store_relaxed(&cq->top, 0);
Expand Down Expand Up @@ -232,7 +232,7 @@ void gc_verify(jl_ptls_t ptls)
return;
}
jl_gc_markqueue_t mq;
jl_gc_markqueue_t *mq2 = &ptls->mark_queue;
jl_gc_markqueue_t *mq2 = &ptls->gc_tls.mark_queue;
ws_queue_t *cq = &mq.chunk_queue;
ws_queue_t *q = &mq.ptr_queue;
jl_atomic_store_relaxed(&cq->top, 0);
Expand Down Expand Up @@ -291,7 +291,7 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
int p_n = pg->pool_n;
int t_n = pg->thread_n;
jl_ptls_t ptls2 = gc_all_tls_states[t_n];
jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n];
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[p_n];
int osize = pg->osize;
char *data = pg->data;
char *page_begin = data + GC_PAGE_OFFSET;
Expand Down Expand Up @@ -353,7 +353,7 @@ static void gc_verify_tags_pagestack(void)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
jl_gc_page_stack_t *pgstk = &ptls2->page_metadata_allocd;
jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
while (pg != NULL) {
gc_verify_tags_page(pg);
Expand All @@ -369,7 +369,7 @@ void gc_verify_tags(void)
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
for (int i = 0; i < JL_GC_N_POOLS; i++) {
// for all pools, iterate its freelist
jl_gc_pool_t *p = &ptls2->heap.norm_pools[i];
jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
jl_taggedvalue_t *next = p->freelist;
jl_taggedvalue_t *last = NULL;
char *allocating = gc_page_data(next);
Expand Down Expand Up @@ -811,8 +811,8 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
int64_t remset_nptr = 0;
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
last_remset_len += ptls2->heap.last_remset->len;
remset_nptr = ptls2->heap.remset_nptr;
last_remset_len += ptls2->gc_tls.heap.last_remset->len;
remset_nptr = ptls2->gc_tls.heap.remset_nptr;
}
jl_safe_printf("GC mark pause %.2f ms | "
"scanned %" PRId64 " kB = %" PRId64 " + %" PRId64 " | "
Expand Down Expand Up @@ -967,13 +967,13 @@ void gc_stats_all_pool(void)
for (int i = 0; i < JL_GC_N_POOLS; i++) {
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
size_t b = pool_stats(&ptls2->heap.norm_pools[i], &w, &np, &nol);
size_t b = pool_stats(&ptls2->gc_tls.heap.norm_pools[i], &w, &np, &nol);
nb += b;
no += (b / ptls2->heap.norm_pools[i].osize);
no += (b / ptls2->gc_tls.heap.norm_pools[i].osize);
tw += w;
tp += np;
nold += nol;
noldbytes += nol * ptls2->heap.norm_pools[i].osize;
noldbytes += nol * ptls2->gc_tls.heap.norm_pools[i].osize;
}
}
jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
Expand All @@ -992,7 +992,7 @@ void gc_stats_big_obj(void)
size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
bigval_t *v = ptls2->heap.big_objects;
bigval_t *v = ptls2->gc_tls.heap.big_objects;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused++;
Expand All @@ -1009,7 +1009,7 @@ void gc_stats_big_obj(void)
v = v->next;
}

mallocarray_t *ma = ptls2->heap.mallocarrays;
mallocarray_t *ma = ptls2->gc_tls.heap.mallocarrays;
while (ma != NULL) {
if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
nused++;
Expand Down Expand Up @@ -1055,7 +1055,7 @@ static void gc_count_pool_pagetable(void)
{
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->page_metadata_allocd.bottom);
jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&ptls2->gc_tls.page_metadata_allocd.bottom);
while (pg != NULL) {
if (gc_alloc_map_is_set(pg->data)) {
gc_count_pool_page(pg);
Expand Down
18 changes: 9 additions & 9 deletions src/gc-stacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAF
if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(bufsz);
if (pool_sizes[pool_id] == bufsz) {
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
return;
}
}
Expand Down Expand Up @@ -196,7 +196,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
#ifdef _COMPILER_ASAN_ENABLED_
__asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
#endif
small_arraylist_push(&ptls->heap.free_stacks[pool_id], stkbuf);
small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
}
}
}
Expand All @@ -211,7 +211,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
unsigned pool_id = select_pool(ssize);
ssize = pool_sizes[pool_id];
small_arraylist_t *pool = &ptls->heap.free_stacks[pool_id];
small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id];
if (pool->len > 0) {
stk = small_arraylist_pop(pool);
}
Expand All @@ -232,7 +232,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
}
*bufsz = ssize;
if (owner) {
small_arraylist_t *live_tasks = &ptls->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks;
mtarraylist_push(live_tasks, owner);
}
return stk;
Expand All @@ -259,7 +259,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT

// free half of stacks that remain unused since last sweep
for (int p = 0; p < JL_N_STACK_POOLS; p++) {
small_arraylist_t *al = &ptls2->heap.free_stacks[p];
small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p];
size_t n_to_free;
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
Expand All @@ -281,10 +281,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
}
}
if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
small_arraylist_free(ptls2->heap.free_stacks);
small_arraylist_free(ptls2->gc_tls.heap.free_stacks);
}

small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = 0;
size_t ndel = 0;
size_t l = live_tasks->len;
Expand Down Expand Up @@ -339,7 +339,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
jl_ptls_t ptls2 = allstates[i];
if (ptls2 == NULL)
continue;
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
l += n + (ptls2->root_task->stkbuf != NULL);
}
Expand All @@ -362,7 +362,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
goto restart;
jl_array_data(a,void*)[j++] = t;
}
small_arraylist_t *live_tasks = &ptls2->heap.live_tasks;
small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
size_t n = mtarraylist_length(live_tasks);
for (size_t i = 0; i < n; i++) {
jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
Expand Down
103 changes: 103 additions & 0 deletions src/gc-tls.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// This file is a part of Julia. License is MIT: https://julialang.org/license

// Meant to be included in "julia_threads.h"
#ifndef JL_GC_TLS_H
#define JL_GC_TLS_H

#include "julia_atomics.h"
#include "work-stealing-queue.h"
// GC threading ------------------------------------------------------------------

#include "arraylist.h"

#ifdef __cplusplus
extern "C" {
#endif

typedef struct {
struct _jl_taggedvalue_t *freelist; // root of list of free objects
struct _jl_taggedvalue_t *newpages; // root of list of chunks of free objects
uint16_t osize; // size of objects in this pool
} jl_gc_pool_t;

typedef struct {
// variable for tracking weak references
small_arraylist_t weak_refs;
// live tasks started on this thread
// that are holding onto a stack from the pool
small_arraylist_t live_tasks;

// variables for tracking malloc'd arrays
struct _mallocarray_t *mallocarrays;
struct _mallocarray_t *mafreelist;

// variables for tracking big objects
struct _bigval_t *big_objects;

// lower bound of the number of pointers inside remembered values
int remset_nptr;
// remembered set
arraylist_t remset;

// variables for allocating objects from pools
#define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];

#define JL_N_STACK_POOLS 16
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
} jl_thread_heap_t;

typedef struct {
_Atomic(int64_t) allocd;
_Atomic(int64_t) pool_live_bytes;
_Atomic(uint64_t) malloc;
_Atomic(uint64_t) realloc;
_Atomic(uint64_t) poolalloc;
_Atomic(uint64_t) bigalloc;
_Atomic(int64_t) free_acc;
_Atomic(uint64_t) alloc_acc;
} jl_thread_gc_num_t;

typedef struct {
ws_queue_t chunk_queue;
ws_queue_t ptr_queue;
arraylist_t reclaim_set;
} jl_gc_markqueue_t;

typedef struct {
// thread local increment of `perm_scanned_bytes`
size_t perm_scanned_bytes;
// thread local increment of `scanned_bytes`
size_t scanned_bytes;
// Number of queued big objects (<= 1024)
size_t nbig_obj;
// Array of queued big objects to be moved between the young list
// and the old list.
// A set low bit means that the object should be moved from the old list
// to the young list (`mark_reset_age`).
// Objects can only be put into this list when the mark bit is flipped to
// `1` (atomically). Combining with the sync after marking,
// this makes sure that a single objects can only appear once in
// the lists (the mark bit cannot be flipped to `0` without sweeping)
void *big_obj[1024];
} jl_gc_mark_cache_t;

typedef struct {
_Atomic(struct _jl_gc_pagemeta_t *) bottom;
} jl_gc_page_stack_t;

typedef struct {
jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
jl_gc_page_stack_t page_metadata_allocd;
jl_thread_gc_num_t gc_num;
jl_gc_markqueue_t mark_queue;
jl_gc_mark_cache_t gc_cache;
_Atomic(size_t) gc_sweeps_requested;
arraylist_t sweep_objs;
} jl_gc_tls_states_t;

#ifdef __cplusplus
}
#endif

#endif // JL_GC_TLS_H
Loading

0 comments on commit f4267f8

Please sign in to comment.