Skip to content

Commit

Permalink
Allocate some frame environments on the callstack
Browse files Browse the repository at this point in the history
Only do this for frames that live on the callstack rather than on the
heap. This is the case when we have a lexical environment but it is
never captured or the frame doesn't escape in other ways. When frames do
escape, we have to also move the environment out of the callstack and
onto the heap. We already do go to some effort to allocate on the heap
in the first place when a frame tends to get promoted there anyway, so
the amount of times this movement will be required is limited. This is
mostly of benefit to Raku rather than NQP code, since in NQP we can
quite often totally eliminate lexicals, whereas with Raku that isn't
always possible.
  • Loading branch information
jnthn committed Oct 26, 2021
1 parent d83edbe commit 7eca0ed
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 85 deletions.
84 changes: 72 additions & 12 deletions src/core/callstack.c
Expand Up @@ -104,13 +104,18 @@ size_t record_size(MVMCallStackRecord *record) {
return sizeof(MVMCallStackRegionStart);
case MVM_CALLSTACK_RECORD_FRAME:
return sizeof(MVMCallStackFrame) +
to_8_bytes(((MVMCallStackFrame *)record)->frame.allocd_work);
to_8_bytes(((MVMCallStackFrame *)record)->frame.allocd_work +
((MVMCallStackFrame *)record)->frame.allocd_env);
case MVM_CALLSTACK_RECORD_HEAP_FRAME:
return sizeof(MVMCallStackHeapFrame) +
to_8_bytes(((MVMCallStackHeapFrame *)record)->frame->allocd_work);
case MVM_CALLSTACK_RECORD_PROMOTED_FRAME:
/* Look at memory from dead (pre-promotion) environment size, as
* we won't grow that on the callstack if we've moved it to the
* heap. */
return sizeof(MVMCallStackPromotedFrame) +
to_8_bytes(((MVMCallStackHeapFrame *)record)->frame->allocd_work);
to_8_bytes(((MVMCallStackPromotedFrame *)record)->frame->allocd_work +
((MVMCallStackPromotedFrame *)record)->dead.allocd_env);
case MVM_CALLSTACK_RECORD_CONTINUATION_TAG:
return sizeof(MVMCallStackContinuationTag);
case MVM_CALLSTACK_RECORD_DISPATCH_RECORD:
Expand Down Expand Up @@ -153,13 +158,17 @@ MVMCallStackRecord * MVM_callstack_allocate_nested_runloop(MVMThreadContext *tc)
}

/* Allocates a bytecode frame record on the callstack. */
MVMCallStackFrame * MVM_callstack_allocate_frame(MVMThreadContext *tc, MVMuint16 work_size) {
MVMCallStackFrame * MVM_callstack_allocate_frame(MVMThreadContext *tc, MVMuint16 work_size,
MVMuint16 env_size) {
/* Allocate frame with space for registers initialized. */
tc->stack_top = allocate_record(tc, MVM_CALLSTACK_RECORD_FRAME,
sizeof(MVMCallStackFrame) + to_8_bytes(work_size));
sizeof(MVMCallStackFrame) + to_8_bytes(work_size + env_size));
MVMCallStackFrame *allocated = (MVMCallStackFrame *)tc->stack_top;
allocated->frame.work = (MVMRegister *)((char *)allocated + sizeof(MVMCallStackFrame));
allocated->frame.env = (MVMRegister *)((char *)allocated + sizeof(MVMCallStackFrame)
+ work_size);
allocated->frame.allocd_work = work_size;
allocated->frame.allocd_env = env_size;

/* Ensure collectable header flags and owner are zeroed, which means we'll
* never try to mark or root the frame. */
Expand Down Expand Up @@ -191,20 +200,71 @@ MVMCallStackHeapFrame * MVM_callstack_allocate_heap_frame(MVMThreadContext *tc,

/* Sees if we can allocate work space (extra registers) for the purposes of
* OSR. */
MVMint32 MVM_callstack_ensure_work_space(MVMThreadContext *tc, MVMuint16 needed_size) {
MVMint32 MVM_callstack_ensure_work_and_env_space(MVMThreadContext *tc, MVMuint16 needed_work,
MVMuint16 needed_env) {
/* Call this to ensure we really do have a frame on the top of the stack,
* rather than just reading tc->cur_frame. */
MVMFrame *cur_frame = MVM_callstack_current_frame(tc);

/* Get difference needed. If it's too much, return false. */
MVMuint16 diff = to_8_bytes(needed_size - cur_frame->allocd_work);
/* Calculate the new work and environment sizes, ensuring we only ever
* grow them. */
MVMuint16 new_work_size = needed_work > cur_frame->allocd_work
? needed_work
: cur_frame->allocd_work;
MVMuint16 new_env_size = needed_env > cur_frame->allocd_env
? needed_env
: cur_frame->allocd_env;

/* How we grow them depends on whether it's a callstack frame (and so the
* environment lives on the callstack) or a heap one. */
MVMCallStackRegion *region = tc->stack_current_region;
if (region->alloc_limit - region->alloc < diff)
return 0;
if (MVM_FRAME_IS_ON_CALLSTACK(tc, cur_frame)) {
/* Work out how much space we need for work and environment; bail if
* we don't have that much. */
MVMuint16 have = cur_frame->allocd_work + cur_frame->allocd_env;
MVMuint16 need = new_work_size + new_env_size;
MVMuint16 diff = to_8_bytes(need - have);
if (region->alloc_limit - region->alloc < diff)
return 0;

/* Allocate the extra space on the callstack. */
region->alloc += diff;

/* Move the environment to its new location on the callstack. */
MVMRegister *new_env = (MVMRegister *)(((char *)cur_frame) + sizeof(MVMFrame)
+ new_work_size);
memmove(new_env, cur_frame->env, cur_frame->allocd_env);
cur_frame->env = new_env;
}
else {
/* Work out how much extra space we need for work, if any. */
MVMuint16 have = cur_frame->allocd_work;
MVMuint16 need = new_work_size;
MVMuint16 diff = to_8_bytes(need - have);
if (region->alloc_limit - region->alloc < diff)
return 0;

/* Allocate the extra space on the callstack. */
region->alloc += diff;

/* If the environment size changed, then need to realloc using the
* FSA. */
if (new_env_size > cur_frame->allocd_env) {
MVMRegister *new_env = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa,
new_env_size);
if (cur_frame->allocd_env) {
memcpy(new_env, cur_frame->env, cur_frame->allocd_env);
MVM_fixed_size_free(tc, tc->instance->fsa, cur_frame->allocd_env,
cur_frame->env);
}
cur_frame->env = new_env;
}
}

/* Update new sizes. */
cur_frame->allocd_work = new_work_size;
cur_frame->allocd_env = new_env_size;

/* Add extra space. */
region->alloc += diff;
cur_frame->allocd_work = needed_size;;
return 1;
}

Expand Down
17 changes: 12 additions & 5 deletions src/core/callstack.h
Expand Up @@ -69,7 +69,10 @@ struct MVMCallStackRegionStart {
};

/* A bytecode frame, the MVMFrame being allocated inline on the callstack.
* It is followed by space for the work area (registers). */
* It is followed by space for the work area (registers) and the lexical
* environment (also registers). The work area lives on the callstack no
* matter if the frame ends up heap promoted; the environment will be
* copied into a heap location for safety reasons upon promotion. */
#define MVM_CALLSTACK_RECORD_FRAME 2
struct MVMCallStackFrame {
/* Commonalities of all records. */
Expand All @@ -80,7 +83,8 @@ struct MVMCallStackFrame {
};

/* A bytecode frame where the MVMFrame was allocated directly on the heap.
* It is followed by space for the work area (registers). */
* It is followed by space for the work area (registers). Unlike a frame on
* the stack, it is not followed by an environment. */
#define MVM_CALLSTACK_RECORD_HEAP_FRAME 3
struct MVMCallStackHeapFrame {
/* Commonalities of all records. */
Expand All @@ -92,7 +96,8 @@ struct MVMCallStackHeapFrame {

/* A bytecode frame where the MVMFrame was allocated inline on the callstack,
* but later promoted to the heap. The work registers still live directly
* after it. */
* after it; the space for the environment remains allocated, but is not used
* (it's evacuated to the heap). */
#define MVM_CALLSTACK_RECORD_PROMOTED_FRAME 4
struct MVMCallStackPromotedFrame {
/* Commonalities of all records. */
Expand Down Expand Up @@ -341,10 +346,12 @@ struct MVMCallStackNestedRunloop {
/* Functions for working with the call stack. */
void MVM_callstack_init(MVMThreadContext *tc);
MVMCallStackRecord * MVM_callstack_allocate_nested_runloop(MVMThreadContext *tc);
MVMCallStackFrame * MVM_callstack_allocate_frame(MVMThreadContext *tc, MVMuint16 work_size);
MVMCallStackFrame * MVM_callstack_allocate_frame(MVMThreadContext *tc, MVMuint16 work_size,
MVMuint16 env_size);
MVMCallStackHeapFrame * MVM_callstack_allocate_heap_frame(MVMThreadContext *tc,
MVMuint16 work_size);
MVMint32 MVM_callstack_ensure_work_space(MVMThreadContext *tc, MVMuint16 needed_size);
MVMint32 MVM_callstack_ensure_work_and_env_space(MVMThreadContext *tc, MVMuint16 needed_work,
MVMuint16 needed_env);
MVMCallStackDispatchRecord * MVM_callstack_allocate_dispatch_record(MVMThreadContext *tc);
MVMCallStackDispatchRun * MVM_callstack_allocate_dispatch_run(MVMThreadContext *tc,
MVMuint32 num_temps);
Expand Down
88 changes: 45 additions & 43 deletions src/core/frame.c
Expand Up @@ -130,7 +130,7 @@ static void instrumentation_level_barrier(MVMThreadContext *tc, MVMStaticFrame *
void MVM_frame_destroy(MVMThreadContext *tc, MVMFrame *frame) {
if (frame->work)
MVM_args_proc_cleanup(tc, &frame->params);
if (frame->env)
if (frame->env && !MVM_FRAME_IS_ON_CALLSTACK(tc, frame))
MVM_fixed_size_free(tc, tc->instance->fsa, frame->allocd_env, frame->env);
if (frame->extra) {
MVMFrameExtra *e = frame->extra;
Expand Down Expand Up @@ -255,40 +255,43 @@ static MVMFrame * autoclose(MVMThreadContext *tc, MVMStaticFrame *needed) {
static MVMFrame * allocate_frame(MVMThreadContext *tc, MVMStaticFrame *static_frame,
MVMSpeshCandidate *spesh_cand, MVMint32 heap) {
MVMFrame *frame;
MVMint32 env_size, num_locals;
MVMint32 num_locals;
MVMStaticFrameBody *static_frame_body;
MVMJitCode *jitcode;

MVMint32 work_size = spesh_cand ? spesh_cand->body.work_size : static_frame->body.work_size;
MVMint32 env_size = spesh_cand ? spesh_cand->body.env_size : static_frame->body.env_size;
if (heap) {
/* Allocate frame on the heap. */
/* Allocate frame on the heap. The callstack record includes space
* for the work registers and ->work will have been set up already. */
MVMROOT2(tc, static_frame, spesh_cand, {
if (tc->cur_frame)
MVM_frame_force_to_heap(tc, tc->cur_frame);
frame = MVM_callstack_allocate_heap_frame(tc, work_size)->frame;
});

/* If we have an environment, that needs allocating separately for
* heap-based frames. */
if (env_size) {
frame->env = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa, env_size);
frame->allocd_env = env_size;
}
}
else {
/* Allocate the frame on the call stack. */
MVMCallStackFrame *record = MVM_callstack_allocate_frame(tc, work_size);
/* Allocate the frame on the call stack. The callstack record includes
* space for both the work registers and the environment, and both the
* ->work and ->env pointers will have been set up already, but we do
* need to clear the environment. */
MVMCallStackFrame *record = MVM_callstack_allocate_frame(tc, work_size, env_size);
frame = &(record->frame);
memset(frame->env, 0, env_size);
}

/* Allocate space for lexicals and work area. */
/* Set up work area. */
static_frame_body = &(static_frame->body);
env_size = spesh_cand ? spesh_cand->body.env_size : static_frame_body->env_size;

jitcode = spesh_cand ? spesh_cand->body.jitcode : NULL;
num_locals = jitcode && jitcode->local_types ? jitcode->num_locals :
(spesh_cand ? spesh_cand->body.num_locals : static_frame_body->num_locals);
if (env_size) {
frame->env = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa, env_size);
frame->allocd_env = env_size;
}
else {
frame->env = NULL;
frame->allocd_env = 0;
}
if (work_size) {
if (spesh_cand) {
/* Zero frame memory. Spesh makes sure we have VMNull setup in
Expand Down Expand Up @@ -322,18 +325,7 @@ void MVM_frame_setup_deopt(MVMThreadContext *tc, MVMFrame *frame, MVMStaticFrame
frame->outer = code_ref->body.outer;
frame->spesh_cand = NULL;
frame->spesh_correlation_id = 0;

/* Allocate space for lexicals and work area. */
MVMStaticFrameBody *static_frame_body = &(static_frame->body);
MVMint32 env_size = static_frame_body->env_size;
if (env_size) {
frame->env = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa, env_size);
frame->allocd_env = env_size;
}
else {
frame->env = NULL;
}
frame->args = frame->work + static_frame_body->num_locals;
frame->args = frame->work + static_frame->body.num_locals;
}

/* Sets up storage for state variables. We do this after tc->cur_frame became
Expand Down Expand Up @@ -634,6 +626,28 @@ MVMFrame * MVM_frame_move_to_heap(MVMThreadContext *tc, MVMFrame *frame) {
MVMCallStackFrame *unpromoted_record = (MVMCallStackFrame *)record;
cur_to_promote = &(unpromoted_record->frame);

/* Move any lexical environment to the heap, as it may now
* out-live the callstack entry. */
MVMuint16 env_size = cur_to_promote->allocd_env;
if (env_size) {
MVMRegister *heap_env = MVM_fixed_size_alloc(tc,
tc->instance->fsa, env_size);
memcpy(heap_env, cur_to_promote->env, env_size);
cur_to_promote->env = heap_env;
}
else {
/* Stack frames may set up the env pointer even if it's to
* an empty area (avoids branches); ensure it is nulled out
* so we don't try to do a bogus free later. */
cur_to_promote->env = NULL;
}

/* Clear any dynamic lexical cache entry, as it may point into an
* environment that gets moved to the heap. */
MVMFrameExtra *e = cur_to_promote->extra;
if (e)
e->dynlex_cache_name = NULL;

/* Allocate a heap frame. */
/* frame is safe from the GC as we wouldn't be here if it wasn't on the stack */
MVMFrame *promoted = MVM_gc_allocate_frame(tc);
Expand Down Expand Up @@ -859,21 +873,9 @@ static MVMuint64 remove_one_frame(MVMThreadContext *tc, MVMuint8 unwind) {
need_caller = 0;
}

/* Clean up frame working space. */
/* Clean up any allocations for argument working area. */
MVM_args_proc_cleanup(tc, &returner->params);

/* If it's a call stack frame, its environment wasn't closed over, so it
* can go away immediately. */
MVMuint32 clear_caller;
if (MVM_FRAME_IS_ON_CALLSTACK(tc, returner)) {
if (returner->env)
MVM_fixed_size_free(tc, tc->instance->fsa, returner->allocd_env, returner->env);
clear_caller = 0;
}
else {
clear_caller = !need_caller;
}

/* NULL out ->work, to indicate the frame is no longer in dynamic scope.
* This is used by the GC to avoid marking stuff (this is needed for
* safety as otherwise we'd read freed memory), as well as by exceptions to
Expand All @@ -894,9 +896,9 @@ static MVMuint64 remove_one_frame(MVMThreadContext *tc, MVMuint8 unwind) {
MVMROOT(tc, returner, {
caller = MVM_callstack_unwind_frame(tc, unwind, &thunked);
});
if (!need_caller)
returner->caller = NULL;
}
if (clear_caller)
returner->caller = NULL;
if (thunked)
return 1;

Expand Down
3 changes: 2 additions & 1 deletion src/spesh/deopt.c
Expand Up @@ -95,7 +95,8 @@ static void uninline(MVMThreadContext *tc, MVMFrame *f, MVMSpeshCandidate *cand,
/* Make a record for it on the stack; the MVMFrame is contained in
* it. Set up the frame. Note that this moves tc->stack_top, so we
* are now considered to be in this frame. */
MVMCallStackFrame *urecord = MVM_callstack_allocate_frame(tc, usf->body.work_size);
MVMCallStackFrame *urecord = MVM_callstack_allocate_frame(tc,
usf->body.work_size, usf->body.env_size);
MVMFrame *uf = &(urecord->frame);
MVM_frame_setup_deopt(tc, uf, usf, ucode);
uf->caller = caller;
Expand Down
31 changes: 7 additions & 24 deletions src/spesh/osr.c
Expand Up @@ -21,10 +21,12 @@ static MVMint32 get_osr_deopt_index(MVMThreadContext *tc, MVMSpeshCandidate *can
/* Does the jump into the optimized code. */
void perform_osr(MVMThreadContext *tc, MVMSpeshCandidate *specialized) {
/* Ensure there is space for the work area. */
if (specialized->body.work_size > tc->cur_frame->allocd_work) {
if (!MVM_callstack_ensure_work_space(tc, specialized->body.work_size)) {
if (specialized->body.work_size > tc->cur_frame->allocd_work ||
specialized->body.env_size > tc->cur_frame->allocd_env) {
if (!MVM_callstack_ensure_work_and_env_space(tc, specialized->body.work_size,
specialized->body.env_size)) {
#if MVM_LOG_OSR
fprintf(stderr, "Failed OSR as cannot grow work area for frame '%s' (cuid: %s)\n",
fprintf(stderr, "Failed OSR as cannot grow work/env area for frame '%s' (cuid: %s)\n",
MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name),
MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid));
#endif
Expand All @@ -46,32 +48,13 @@ void perform_osr(MVMThreadContext *tc, MVMSpeshCandidate *specialized) {
osr_index);
#endif

/* Ensure new area is zeroed out. */
/* Ensure new work and environment areas are zeroed out. */
if (specialized->body.work_size > tc->cur_frame->static_info->body.work_size) {
size_t keep_bytes = tc->cur_frame->static_info->body.num_locals * sizeof(MVMRegister);
size_t to_null = specialized->body.work_size - keep_bytes;
memset((char *)tc->cur_frame->work + keep_bytes, 0, to_null);
}

/* Resize environment if needed. */
if (specialized->body.num_lexicals > tc->cur_frame->static_info->body.num_lexicals) {
MVMRegister *new_env = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa,
specialized->body.env_size);
if (tc->cur_frame->allocd_env) {
memcpy(new_env, tc->cur_frame->env,
tc->cur_frame->static_info->body.num_lexicals * sizeof(MVMRegister));
MVM_fixed_size_free(tc, tc->instance->fsa, tc->cur_frame->allocd_env,
tc->cur_frame->env);
}
tc->cur_frame->env = new_env;
tc->cur_frame->allocd_env = specialized->body.env_size;
#if MVM_LOG_OSR
fprintf(stderr, "OSR resized environment of frame '%s' (cuid: %s)\n",
MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name),
MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid));
#endif
}
else if (specialized->body.env_size > tc->cur_frame->static_info->body.env_size) {
if (specialized->body.env_size > tc->cur_frame->static_info->body.env_size) {
size_t keep_bytes = tc->cur_frame->static_info->body.num_lexicals * sizeof(MVMRegister);
size_t to_null = specialized->body.env_size - keep_bytes;
memset((char *)tc->cur_frame->env + keep_bytes, 0, to_null);
Expand Down

0 comments on commit 7eca0ed

Please sign in to comment.