Skip to content

Commit

Permalink
Merge branch 'better-fsa'
Browse files Browse the repository at this point in the history
  • Loading branch information
jnthn committed Apr 14, 2017
2 parents 0f18016 + 3d7b51c commit a5607e1
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 59 deletions.
124 changes: 87 additions & 37 deletions src/core/fixedsizealloc.c
Expand Up @@ -43,6 +43,15 @@ MVMFixedSizeAlloc * MVM_fixed_size_create(MVMThreadContext *tc) {
return al;
}

/* Creates the per-thread fixed size allocator state. */
void MVM_fixed_size_create_thread(MVMThreadContext *tc) {
MVMFixedSizeAllocThread *al = MVM_malloc(sizeof(MVMFixedSizeAllocThread));
al->size_classes = MVM_calloc(MVM_FSA_BINS, sizeof(MVMFixedSizeAllocThreadSizeClass));
tc->thread_fsa = al;
}

/* Destroys the global fixed size allocator data structure and all of
* the memory held within it. */
void MVM_fixed_size_destroy(MVMFixedSizeAlloc *al) {
int bin_no;

Expand Down Expand Up @@ -138,6 +147,43 @@ static void * alloc_slow_path(MVMThreadContext *tc, MVMFixedSizeAlloc *al, MVMui

return result;
}
static void * alloc_from_global(MVMThreadContext *tc, MVMFixedSizeAlloc *al, MVMuint32 bin) {
/* Try and take from the global free list (fast path). */
MVMFixedSizeAllocSizeClass *bin_ptr = &(al->size_classes[bin]);
MVMFixedSizeAllocFreeListEntry *fle = NULL;
if (MVM_instance_have_user_threads(tc)) {
/* Multi-threaded; take a lock. Note that the lock is needed in
* addition to the atomic operations: the atomics allow us to add
* to the free list in a lock-free way, and the lock allows us to
* avoid the ABA issue we'd have with only the atomics. */
while (!MVM_trycas(&(al->freelist_spin), 0, 1)) {
MVMint32 i = 0;
while (i < 1024)
i++;
}
do {
fle = bin_ptr->free_list;
if (!fle)
break;
} while (!MVM_trycas(&(bin_ptr->free_list), fle, fle->next));
MVM_barrier();
al->freelist_spin = 0;
}
else {
/* Single-threaded; just take it. */
fle = bin_ptr->free_list;
if (fle)
bin_ptr->free_list = fle->next;
}
if (fle) {
VALGRIND_MEMPOOL_ALLOC(&al->size_classes[bin], ((void *)fle),
(bin + 1) << MVM_FSA_BIN_BITS);
return (void *)fle;
}

/* Failed to take from free list; slow path with the lock. */
return alloc_slow_path(tc, al, bin);
}
void * MVM_fixed_size_alloc(MVMThreadContext *tc, MVMFixedSizeAlloc *al, size_t bytes) {
#if FSA_SIZE_DEBUG
MVMFixedSizeAllocDebug *dbg = MVM_malloc(bytes + sizeof(MVMuint64));
Expand All @@ -146,45 +192,17 @@ void * MVM_fixed_size_alloc(MVMThreadContext *tc, MVMFixedSizeAlloc *al, size_t
#else
MVMuint32 bin = bin_for(bytes);
if (bin < MVM_FSA_BINS) {
/* Try and take from the free list (fast path). */
MVMFixedSizeAllocSizeClass *bin_ptr = &(al->size_classes[bin]);
MVMFixedSizeAllocFreeListEntry *fle;
if (MVM_instance_have_user_threads(tc)) {
/* Multi-threaded; take a lock. Note that the lock is needed in
* addition to the atomic operations: the atomics allow us to add
* to the free list in a lock-free way, and the lock allows us to
* avoid the ABA issue we'd have with only the atomics. */
while (!MVM_trycas(&(al->freelist_spin), 0, 1)) {
MVMint32 i = 0;
while (i < 1024)
i++;
}
do {
fle = bin_ptr->free_list;
if (!fle)
break;
} while (!MVM_trycas(&(bin_ptr->free_list), fle, fle->next));
MVM_barrier();
al->freelist_spin = 0;
}
else {
/* Single-threaded; just take it. */
fle = bin_ptr->free_list;
if (fle)
bin_ptr->free_list = fle->next;
}
/* Try and take from the per-thread free list. */
MVMFixedSizeAllocThreadSizeClass *bin_ptr = &(tc->thread_fsa->size_classes[bin]);
MVMFixedSizeAllocFreeListEntry *fle = bin_ptr->free_list;
if (fle) {
VALGRIND_MEMPOOL_ALLOC(&al->size_classes[bin], ((void *)fle), (bin + 1) << MVM_FSA_BIN_BITS);

bin_ptr->free_list = fle->next;
bin_ptr->items--;
return (void *)fle;
}

/* Failed to take from free list; slow path with the lock. */
return alloc_slow_path(tc, al, bin);
}
else {
return MVM_malloc(bytes);
return alloc_from_global(tc, al, bin);
}
return MVM_malloc(bytes);
#endif
}

Expand All @@ -197,8 +215,8 @@ void * MVM_fixed_size_alloc_zeroed(MVMThreadContext *tc, MVMFixedSizeAlloc *al,
}

/* Frees a piece of memory of the specified size, using the FSA. */
static void add_to_bin_freelist(MVMThreadContext *tc, MVMFixedSizeAlloc *al, MVMint32 bin, void *to_free) {
/* Came from a bin; put into free list. */
static void add_to_global_bin_freelist(MVMThreadContext *tc, MVMFixedSizeAlloc *al,
MVMint32 bin, void *to_free) {
MVMFixedSizeAllocSizeClass *bin_ptr = &(al->size_classes[bin]);
MVMFixedSizeAllocFreeListEntry *to_add = (MVMFixedSizeAllocFreeListEntry *)to_free;
MVMFixedSizeAllocFreeListEntry *orig;
Expand All @@ -219,6 +237,20 @@ static void add_to_bin_freelist(MVMThreadContext *tc, MVMFixedSizeAlloc *al, MVM
bin_ptr->free_list = to_add;
}
}
static void add_to_bin_freelist(MVMThreadContext *tc, MVMFixedSizeAlloc *al,
MVMint32 bin, void *to_free) {
MVMFixedSizeAllocThreadSizeClass *bin_ptr = &(tc->thread_fsa->size_classes[bin]);
if (bin_ptr->items < MVM_FSA_THREAD_FREELIST_LIMIT) {
MVMFixedSizeAllocFreeListEntry *to_add = (MVMFixedSizeAllocFreeListEntry *)to_free;
MVMFixedSizeAllocFreeListEntry *orig;
to_add->next = bin_ptr->free_list;
bin_ptr->free_list = to_add;
bin_ptr->items++;
}
else {
add_to_global_bin_freelist(tc, al, bin, to_free);
}
}
void MVM_fixed_size_free(MVMThreadContext *tc, MVMFixedSizeAlloc *al, size_t bytes, void *to_free) {
#if FSA_SIZE_DEBUG
MVMFixedSizeAllocDebug *dbg = (MVMFixedSizeAllocDebug *)((char *)to_free - 8);
Expand Down Expand Up @@ -330,3 +362,21 @@ void MVM_fixed_size_safepoint(MVMThreadContext *tc, MVMFixedSizeAlloc *al) {
}
al->free_at_next_safepoint_overflows = NULL;
}

/* Destroys per-thread fixed size allocator state. All freelists will be
* contributed back to the global freelists for the bin size. */
void MVM_fixed_size_destroy_thread(MVMThreadContext *tc) {
MVMFixedSizeAllocThread *al = tc->thread_fsa;
int bin;
for (bin = 0; bin < MVM_FSA_BINS; bin++) {
MVMFixedSizeAllocThreadSizeClass *bin_ptr = &(al->size_classes[bin]);
MVMFixedSizeAllocFreeListEntry *fle = bin_ptr->free_list;
while (fle) {
MVMFixedSizeAllocFreeListEntry *next = fle->next;
add_to_global_bin_freelist(tc, tc->instance->fsa, bin, (void *)fle);
fle = next;
}
}
MVM_free(al->size_classes);
MVM_free(al);
}
24 changes: 23 additions & 1 deletion src/core/fixedsizealloc.h
@@ -1,4 +1,4 @@
/* The top-level data structure for the fixed size allocator. */
/* The global, top-level data structure for the fixed size allocator. */
struct MVMFixedSizeAlloc {
/* Size classes for the fixed size allocator. Each one represents a bunch
* of objects of the same size. The allocated sizes are rounded and then
Expand Down Expand Up @@ -54,6 +54,23 @@ struct MVMFixedSizeAllocSizeClass {
MVMFixedSizeAllocSafepointFreeListEntry *free_at_next_safepoint_list;
};

/* The per-thread data structure for the fixed size allocator, hung off the
* thread context. Holds a free list per size bin. Allocations on the thread
* will preferentially use the thread free list, and threads will free to
* their own free lists, up to a length limit. On hitting the limit, they
* will free back to the global allocator. This helps ensure patterns like
* producer/consumer don't end up with a "leak". */
struct MVMFixedSizeAllocThread {
MVMFixedSizeAllocThreadSizeClass *size_classes;
};
struct MVMFixedSizeAllocThreadSizeClass {
/* Head of the free list. */
MVMFixedSizeAllocFreeListEntry *free_list;

/* How many items are on this thread's free list. */
MVMuint32 items;
};

/* The number of bits we discard from the requested size when binning
* the allocation request into a size class. For example, if this is
* 3 bits then:
Expand All @@ -74,11 +91,16 @@ struct MVMFixedSizeAllocSizeClass {
/* The number of items that go into each page. */
#define MVM_FSA_PAGE_ITEMS 128

/* The length limit for the per-thread free list. */
#define MVM_FSA_THREAD_FREELIST_LIMIT 1024

/* Functions. */
MVMFixedSizeAlloc * MVM_fixed_size_create(MVMThreadContext *tc);
void MVM_fixed_size_create_thread(MVMThreadContext *tc);
void * MVM_fixed_size_alloc(MVMThreadContext *tc, MVMFixedSizeAlloc *fsa, size_t bytes);
void * MVM_fixed_size_alloc_zeroed(MVMThreadContext *tc, MVMFixedSizeAlloc *fsa, size_t bytes);
void MVM_fixed_size_destroy(MVMFixedSizeAlloc *al);
void MVM_fixed_size_destroy_thread(MVMThreadContext *tc);
void MVM_fixed_size_free(MVMThreadContext *tc, MVMFixedSizeAlloc *fsa, size_t bytes, void *free);
void MVM_fixed_size_free_at_safepoint(MVMThreadContext *tc, MVMFixedSizeAlloc *fsa, size_t bytes, void *free);
void MVM_fixed_size_safepoint(MVMThreadContext *tc, MVMFixedSizeAlloc *al);
6 changes: 6 additions & 0 deletions src/core/threadcontext.c
Expand Up @@ -44,6 +44,9 @@ MVMThreadContext * MVM_tc_create(MVMThreadContext *parent, MVMInstance *instance
/* Set up the second generation allocator. */
tc->gen2 = MVM_gc_gen2_create(instance);

/* The fixed size allocator also keeps pre-thread state. */
MVM_fixed_size_create_thread(tc);

/* Allocate an initial call stack region for the thread. */
MVM_callstack_region_init(tc);

Expand Down Expand Up @@ -78,6 +81,9 @@ void MVM_tc_destroy(MVMThreadContext *tc) {
/* Destroy the second generation allocator. */
MVM_gc_gen2_destroy(tc->instance, tc->gen2);

/* Destory the per-thread fixed size allocator state. */
MVM_fixed_size_destroy_thread(tc);

/* Destroy all callstack regions. */
MVM_callstack_region_destroy_all(tc);

Expand Down
3 changes: 3 additions & 0 deletions src/core/threadcontext.h
Expand Up @@ -44,6 +44,9 @@ struct MVMThreadContext {
/* Thread object representing the thread. */
MVMThread *thread_obj;

/* Per-thread fixed size allocator state. */
MVMFixedSizeAllocThread *thread_fsa;

/* Pointer to where the interpreter's current opcode is stored. */
MVMuint8 **interp_cur_op;

Expand Down
35 changes: 14 additions & 21 deletions src/gc/orchestrate.c
Expand Up @@ -215,19 +215,33 @@ static void finish_gc(MVMThreadContext *tc, MVMuint8 gen, MVMuint8 is_coordinato
MVM_store(&thread_obj->body.stage, MVM_thread_stage_destroyed);
}
else {
/* Free gen2 unmarked if full collection. */
if (gen == MVMGCGenerations_Both) {
GCDEBUG_LOG(tc, MVM_GC_DEBUG_ORCHESTRATE,
"Thread %d run %d : freeing gen2 of thread %d\n",
other->thread_id);
MVM_gc_collect_free_gen2_unmarked(other, 0);
}

/* Contribute this thread's promoted bytes. */
MVM_add(&tc->instance->gc_promoted_bytes_since_last_full, other->gc_promoted_bytes);

/* Collect nursery. */
GCDEBUG_LOG(tc, MVM_GC_DEBUG_ORCHESTRATE,
"Thread %d run %d : collecting nursery uncopied of thread %d\n",
other->thread_id);
MVM_gc_collect_free_nursery_uncopied(other, tc->gc_work[i].limit);

/* Handle exited threads. */
if (MVM_load(&thread_obj->body.stage) == MVM_thread_stage_exited) {
/* Don't bother freeing gen2; we'll do it next time */
MVM_store(&thread_obj->body.stage, MVM_thread_stage_clearing_nursery);
GCDEBUG_LOG(tc, MVM_GC_DEBUG_ORCHESTRATE,
"Thread %d run %d : set thread %d clearing nursery stage to %d\n",
other->thread_id, (int)MVM_load(&thread_obj->body.stage));
}

/* Mark thread free to continue. */
MVM_cas(&other->gc_status, MVMGCStatus_STOLEN, MVMGCStatus_UNABLE);
MVM_cas(&other->gc_status, MVMGCStatus_INTERRUPT, MVMGCStatus_NONE);
}
Expand Down Expand Up @@ -337,27 +351,6 @@ static void run_gc(MVMThreadContext *tc, MVMuint8 what_to_do) {

/* Wait for everybody to agree we're done. */
finish_gc(tc, gen, what_to_do == MVMGCWhatToDo_All);

/* Now we're all done, it's safe to finalize any objects that need it. */
/* XXX TODO explore the feasability of doing this in a background
* finalizer/destructor thread and letting the main thread(s) continue
* on their merry way(s). */
for (i = 0, n = tc->gc_work_count ; i < n; i++) {
MVMThreadContext *other = tc->gc_work[i].tc;

/* The thread might've been destroyed */
if (!other)
continue;

/* Contribute this thread's promoted bytes. */
MVM_add(&tc->instance->gc_promoted_bytes_since_last_full, other->gc_promoted_bytes);

/* Collect nursery. */
GCDEBUG_LOG(tc, MVM_GC_DEBUG_ORCHESTRATE,
"Thread %d run %d : collecting nursery uncopied of thread %d\n",
other->thread_id);
MVM_gc_collect_free_nursery_uncopied(other, tc->gc_work[i].limit);
}
}

/* This is called when the allocator finds it has run out of memory and wants
Expand Down
2 changes: 2 additions & 0 deletions src/types.h
Expand Up @@ -48,6 +48,8 @@ typedef struct MVMFixedSizeAlloc MVMFixedSizeAlloc;
typedef struct MVMFixedSizeAllocFreeListEntry MVMFixedSizeAllocFreeListEntry;
typedef struct MVMFixedSizeAllocSafepointFreeListEntry MVMFixedSizeAllocSafepointFreeListEntry;
typedef struct MVMFixedSizeAllocSizeClass MVMFixedSizeAllocSizeClass;
typedef struct MVMFixedSizeAllocThread MVMFixedSizeAllocThread;
typedef struct MVMFixedSizeAllocThreadSizeClass MVMFixedSizeAllocThreadSizeClass;
typedef struct MVMFrame MVMFrame;
typedef struct MVMFrameHandler MVMFrameHandler;
typedef struct MVMGen2Allocator MVMGen2Allocator;
Expand Down

0 comments on commit a5607e1

Please sign in to comment.