Skip to content

Commit

Permalink
Enable storing serialized data in bytecode section
Browse files Browse the repository at this point in the history
This avoids the whole base-64 of an MVMString and so forth, speeding
both startup and compilation, cutting memory footprint and decreasing
the disk size of NQP and Rakudo compilation output.
  • Loading branch information
jnthn committed Jan 11, 2014
1 parent f2ce6f8 commit cf39b8f
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 20 deletions.
4 changes: 4 additions & 0 deletions src/6model/reprs/MVMCompUnit.h
Expand Up @@ -45,6 +45,10 @@ struct MVMCompUnitBody {
MVMString **strings;
MVMuint32 num_strings;

/* Serialized data, if any. */
char *serialized;
MVMint32 serialized_size;

/* Array of the resolved serialization contexts, and how many we
* have. A null in the list indicates not yet resolved */
MVMSerializationContext **scs;
Expand Down
56 changes: 39 additions & 17 deletions src/6model/serialization.c
Expand Up @@ -723,6 +723,17 @@ static MVMString * concatenate_outputs(MVMThreadContext *tc, MVMSerializationWri
MVM_exception_throw_adhoc(tc,
"Serialization sanity check failed: offset != output_size");

/* If we are compiling at present, then just stash the output for later
* incorporation into the bytecode file. */
if (tc->compiling_scs && MVM_repr_elems(tc, tc->compiling_scs) &&
MVM_repr_at_pos_o(tc, tc->compiling_scs, 0) == (MVMObject *)writer->root.sc) {
if (tc->serialized)
free(tc->serialized);
tc->serialized = output;
tc->serialized_size = output_size;
return NULL;
}

/* Base 64 encode. */
output_b64 = base64_encode(output, output_size);
free(output);
Expand Down Expand Up @@ -1065,7 +1076,9 @@ MVMString * MVM_serialization_serialize(MVMThreadContext *tc, MVMSerializationCo
/* Start serializing. */
serialize(tc, writer);

/* Build a single result MVMString out of the serialized data. */
/* Build a single result out of the serialized data; note if we're in the
* compiler pipeline this will return null and stash the output to write
* to a bytecode file later. */
result = concatenate_outputs(tc, writer);

/* Clear up afterwards. */
Expand Down Expand Up @@ -1135,7 +1148,7 @@ MVM_NO_RETURN
static void fail_deserialize(MVMThreadContext *tc, MVMSerializationReader *reader,
const char *messageFormat, ...) {
va_list args;
if (reader->data)
if (!(*tc->interp_cu)->body.serialized && reader->data)
free(reader->data);
if (reader->contexts)
free(reader->contexts);
Expand Down Expand Up @@ -1410,21 +1423,26 @@ static MVMSTable * read_stable_ref_func(MVMThreadContext *tc, MVMSerializationRe
* the reader data structure more fully. */
static void check_and_dissect_input(MVMThreadContext *tc,
MVMSerializationReader *reader, MVMString *data_str) {
/* Grab data from string. */
size_t data_len;
/* XXX TODO: create an internals-only interface so a string can
* be decoded into an existing buffer if it's big enough... then
* cache that buffer on threadcontext to avoid one of the
* allocations when decoding base64. */
char *data_b64 = (char *)MVM_string_ascii_encode(tc, data_str, NULL);
/* XXX TODO: extend base64_decode to take a pointer to a pointer
* to a destination buffer, and to decode to it if the buffer is
* big enough... then cache this buffer on the threadcontext to
* get rid of the other mallocation... */
char *data = (char *)base64_decode(data_b64, &data_len);
char *prov_pos = data;
char *data_end = data + data_len;
free(data_b64);
char *data;
char *prov_pos;
char *data_end;
if (data_str) {
/* Grab data from string. */
char *data_b64 = (char *)MVM_string_ascii_encode(tc, data_str, NULL);
data = (char *)base64_decode(data_b64, &data_len);
free(data_b64);
}
else {
/* Try to get it from the current compilation unit. */
data = (*tc->interp_cu)->body.serialized;
if (!data)
fail_deserialize(tc, reader,
"Failed to find deserialization data in compilation unit");
data_len = (*tc->interp_cu)->body.serialized_size;
}
prov_pos = data;
data_end = data + data_len;

/* Ensure we got the data. */
if (data == NULL)
Expand Down Expand Up @@ -1968,7 +1986,11 @@ void MVM_serialization_deserialize(MVMThreadContext *tc, MVMSerializationContext
deserialize_object(tc, reader, i, MVM_sc_get_object(tc, sc, i));

/* Clear up afterwards. */
if (reader->data)
if ((*tc->interp_cu)->body.serialized) {
(*tc->interp_cu)->body.serialized = NULL;
(*tc->interp_cu)->body.serialized_size = 0;
}
else if (reader->data)
free(reader->data);
if (reader->contexts)
free(reader->contexts);
Expand Down
13 changes: 11 additions & 2 deletions src/core/bytecode.c
Expand Up @@ -205,8 +205,17 @@ static ReaderState * dissect_bytecode(MVMThreadContext *tc, MVMCompUnit *cu) {
rs->string_seg = cu_body->data_start + offset;
rs->expected_strings = read_int32(cu_body->data_start, STRING_HEADER_OFFSET + 4);

/* TODO: SC data segment supposedly goes here.
* For now, just reserve 8 bytes. */
/* Get SC data, if any. */
offset = read_int32(cu_body->data_start, SCDATA_HEADER_OFFSET);
size = read_int32(cu_body->data_start, SCDATA_HEADER_OFFSET + 4);
if (offset > cu_body->data_size || offset + size > cu_body->data_size) {
cleanup_all(tc, rs);
MVM_exception_throw_adhoc(tc, "Serialized data segment overflows end of stream");
}
if (offset) {
cu_body->serialized = cu_body->data_start + offset;
cu_body->serialized_size = size;
}

/* Locate bytecode segment. */
offset = read_int32(cu_body->data_start, BYTECODE_HEADER_OFFSET);
Expand Down
5 changes: 5 additions & 0 deletions src/core/threadcontext.h
Expand Up @@ -161,6 +161,11 @@ struct MVMThreadContext {
* index 0. */
MVMObject *compiling_scs;

/* Memory buffer pointing to the last thing we serialized, intended to go
* into the next compilation unit we write. */
char *serialized;
MVMint32 serialized_size;

/* Dispatcher set for next invocation to take. */
MVMObject *cur_dispatcher;

Expand Down
13 changes: 12 additions & 1 deletion src/mast/compiler.c
Expand Up @@ -1149,6 +1149,8 @@ char * form_bytecode_output(VM, WriterState *ws, unsigned int *bytecode_size) {
size += ws->callsite_pos;
size += ws->bytecode_pos;
size += ws->annotation_pos;
if (vm->serialized)
size += vm->serialized_size;

/* Allocate space for the bytecode output. */
output = (char *)malloc(size);
Expand Down Expand Up @@ -1193,7 +1195,16 @@ char * form_bytecode_output(VM, WriterState *ws, unsigned int *bytecode_size) {
string_heap = NULL;
}

/* TODO: SC data. Write nothing for now. */
/* SC data. Write it if we have it. */
if (vm->serialized) {
write_int32(output, SCDATA_HEADER_OFFSET, pos);
write_int32(output, SCDATA_HEADER_OFFSET + 4, vm->serialized_size);
memcpy(output + pos, vm->serialized, vm->serialized_size);
pos += vm->serialized_size;
free(vm->serialized);
vm->serialized = NULL;
vm->serialized_size = 0;
}

/* Add bytecode section and its header entries (offset, length). */
write_int32(output, BYTECODE_HEADER_OFFSET, pos);
Expand Down

0 comments on commit cf39b8f

Please sign in to comment.