From b9bf0ac4178c453c84e0bdcf8e7b58cdbcf34073 Mon Sep 17 00:00:00 2001 From: dams Date: Thu, 10 Jul 2014 13:57:52 +0200 Subject: [PATCH] Erlang encoder / decoder --- Erlang/Sereal/Makefile | 20 + Erlang/Sereal/decoder/c_src/decoder.c | 1231 +++++++++++++++++ Erlang/Sereal/decoder/c_src/decoder.h | 11 + Erlang/Sereal/decoder/c_src/miniz.c | 1 + Erlang/Sereal/decoder/c_src/miniz.h | 1 + Erlang/Sereal/decoder/c_src/snappy | 1 + Erlang/Sereal/decoder/c_src/srl_protocol.h | 1 + Erlang/Sereal/decoder/c_src/uthash.h | 958 +++++++++++++ Erlang/Sereal/decoder/c_src/utils.c | 1 + Erlang/Sereal/decoder/c_src/utils.h | 1 + Erlang/Sereal/decoder/src/decoder.erl | 75 + Erlang/Sereal/encoder/c_src/encoder.c | 865 ++++++++++++ Erlang/Sereal/encoder/c_src/miniz.c | 1 + Erlang/Sereal/encoder/c_src/miniz.h | 1 + Erlang/Sereal/encoder/c_src/snappy | 1 + Erlang/Sereal/encoder/c_src/srl_protocol.h | 1 + Erlang/Sereal/encoder/c_src/utils.c | 1 + Erlang/Sereal/encoder/c_src/utils.h | 1 + Erlang/Sereal/encoder/src/encoder.erl | 66 + Erlang/Sereal/rebar.config | 41 + Erlang/Sereal/shared/utils.c | 91 ++ Erlang/Sereal/shared/utils.h | 41 + Erlang/Sereal/src/sereal.app.src | 6 + Erlang/Sereal/src/sereal.erl | 15 + Erlang/Sereal/test/basic_tests.erl | 50 + Erlang/Sereal/test/cases/all/test10.eterm | 1 + Erlang/Sereal/test/cases/all/test10.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test11.eterm | 1 + Erlang/Sereal/test/cases/all/test11.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test12.eterm | 1 + Erlang/Sereal/test/cases/all/test12.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test13.eterm | 1 + Erlang/Sereal/test/cases/all/test13.srl | Bin 0 -> 8 bytes Erlang/Sereal/test/cases/all/test14.eterm | 1 + Erlang/Sereal/test/cases/all/test14.srl | Bin 0 -> 10 bytes Erlang/Sereal/test/cases/all/test15.eterm | 1 + Erlang/Sereal/test/cases/all/test15.srl | Bin 0 -> 3009 bytes Erlang/Sereal/test/cases/all/test19.eterm | 1 + Erlang/Sereal/test/cases/all/test19.srl | Bin 0 -> 9 bytes Erlang/Sereal/test/cases/all/test21.eterm | 1 + Erlang/Sereal/test/cases/all/test21.srl | Bin 0 -> 9 bytes Erlang/Sereal/test/cases/all/test22.eterm | 1 + Erlang/Sereal/test/cases/all/test22.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test23.eterm | 1 + Erlang/Sereal/test/cases/all/test23.srl | Bin 0 -> 12 bytes Erlang/Sereal/test/cases/all/test24.eterm | 1 + Erlang/Sereal/test/cases/all/test24.srl | Bin 0 -> 17 bytes Erlang/Sereal/test/cases/all/test25.eterm | 2 + Erlang/Sereal/test/cases/all/test25.srl | Bin 0 -> 22 bytes Erlang/Sereal/test/cases/all/test26.eterm | 2 + Erlang/Sereal/test/cases/all/test26.srl | Bin 0 -> 16 bytes Erlang/Sereal/test/cases/all/test6.eterm | 1 + Erlang/Sereal/test/cases/all/test6.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test7.eterm | 1 + Erlang/Sereal/test/cases/all/test7.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test8.eterm | 1 + Erlang/Sereal/test/cases/all/test8.srl | Bin 0 -> 7 bytes Erlang/Sereal/test/cases/all/test9.eterm | 1 + Erlang/Sereal/test/cases/all/test9.srl | Bin 0 -> 7 bytes .../test/cases/arrayref_list/test17.eterm | 1 + .../test/cases/arrayref_list/test17.srl | Bin 0 -> 7 bytes .../test/cases/arrayref_list/test18.eterm | 1 + .../test/cases/arrayref_list/test18.srl | Bin 0 -> 10 bytes .../test/cases/arrayref_list/test2.eterm | 1 + .../Sereal/test/cases/arrayref_list/test2.srl | Bin 0 -> 20 bytes .../test/cases/arrayref_list/test20.eterm | 1 + .../test/cases/arrayref_list/test20.srl | Bin 0 -> 2868 bytes .../test/cases/arrayref_list/test27.eterm | 2 + .../test/cases/arrayref_list/test27.srl | Bin 0 -> 18 bytes .../test/cases/arrayref_list/test29.eterm | 2 + .../test/cases/arrayref_list/test29.srl | Bin 0 -> 14 bytes Erlang/Sereal/test/cases/maps_map/test5.eterm | 14 + Erlang/Sereal/test/cases/maps_map/test5.srl | Bin 0 -> 91 bytes .../Sereal/test/cases/maps_tuple/test1.eterm | 5 + Erlang/Sereal/test/cases/maps_tuple/test1.srl | Bin 0 -> 19 bytes .../Sereal/test/cases/maps_tuple/test3.eterm | 1 + Erlang/Sereal/test/cases/maps_tuple/test3.srl | Bin 0 -> 17 bytes .../Sereal/test/cases/maps_tuple/test4.eterm | 13 + Erlang/Sereal/test/cases/maps_tuple/test4.srl | Bin 0 -> 61 bytes Erlang/Sereal/test/decoder_test.beam | Bin 0 -> 1208 bytes Erlang/Sereal/test/looks_like_sereal_test.erl | 22 + Erlang/Sereal/test/round_trip.erl | 105 ++ 82 files changed, 3669 insertions(+) create mode 100644 Erlang/Sereal/Makefile create mode 100644 Erlang/Sereal/decoder/c_src/decoder.c create mode 100644 Erlang/Sereal/decoder/c_src/decoder.h create mode 120000 Erlang/Sereal/decoder/c_src/miniz.c create mode 120000 Erlang/Sereal/decoder/c_src/miniz.h create mode 120000 Erlang/Sereal/decoder/c_src/snappy create mode 120000 Erlang/Sereal/decoder/c_src/srl_protocol.h create mode 100644 Erlang/Sereal/decoder/c_src/uthash.h create mode 120000 Erlang/Sereal/decoder/c_src/utils.c create mode 120000 Erlang/Sereal/decoder/c_src/utils.h create mode 100644 Erlang/Sereal/decoder/src/decoder.erl create mode 100644 Erlang/Sereal/encoder/c_src/encoder.c create mode 120000 Erlang/Sereal/encoder/c_src/miniz.c create mode 120000 Erlang/Sereal/encoder/c_src/miniz.h create mode 120000 Erlang/Sereal/encoder/c_src/snappy create mode 120000 Erlang/Sereal/encoder/c_src/srl_protocol.h create mode 120000 Erlang/Sereal/encoder/c_src/utils.c create mode 120000 Erlang/Sereal/encoder/c_src/utils.h create mode 100644 Erlang/Sereal/encoder/src/encoder.erl create mode 100644 Erlang/Sereal/rebar.config create mode 100644 Erlang/Sereal/shared/utils.c create mode 100644 Erlang/Sereal/shared/utils.h create mode 100644 Erlang/Sereal/src/sereal.app.src create mode 100644 Erlang/Sereal/src/sereal.erl create mode 100644 Erlang/Sereal/test/basic_tests.erl create mode 100644 Erlang/Sereal/test/cases/all/test10.eterm create mode 100644 Erlang/Sereal/test/cases/all/test10.srl create mode 100644 Erlang/Sereal/test/cases/all/test11.eterm create mode 100644 Erlang/Sereal/test/cases/all/test11.srl create mode 100644 Erlang/Sereal/test/cases/all/test12.eterm create mode 100644 Erlang/Sereal/test/cases/all/test12.srl create mode 100644 Erlang/Sereal/test/cases/all/test13.eterm create mode 100644 Erlang/Sereal/test/cases/all/test13.srl create mode 100644 Erlang/Sereal/test/cases/all/test14.eterm create mode 100644 Erlang/Sereal/test/cases/all/test14.srl create mode 100644 Erlang/Sereal/test/cases/all/test15.eterm create mode 100644 Erlang/Sereal/test/cases/all/test15.srl create mode 100644 Erlang/Sereal/test/cases/all/test19.eterm create mode 100644 Erlang/Sereal/test/cases/all/test19.srl create mode 100644 Erlang/Sereal/test/cases/all/test21.eterm create mode 100644 Erlang/Sereal/test/cases/all/test21.srl create mode 100644 Erlang/Sereal/test/cases/all/test22.eterm create mode 100644 Erlang/Sereal/test/cases/all/test22.srl create mode 100644 Erlang/Sereal/test/cases/all/test23.eterm create mode 100644 Erlang/Sereal/test/cases/all/test23.srl create mode 100644 Erlang/Sereal/test/cases/all/test24.eterm create mode 100644 Erlang/Sereal/test/cases/all/test24.srl create mode 100644 Erlang/Sereal/test/cases/all/test25.eterm create mode 100644 Erlang/Sereal/test/cases/all/test25.srl create mode 100644 Erlang/Sereal/test/cases/all/test26.eterm create mode 100644 Erlang/Sereal/test/cases/all/test26.srl create mode 100644 Erlang/Sereal/test/cases/all/test6.eterm create mode 100644 Erlang/Sereal/test/cases/all/test6.srl create mode 100644 Erlang/Sereal/test/cases/all/test7.eterm create mode 100644 Erlang/Sereal/test/cases/all/test7.srl create mode 100644 Erlang/Sereal/test/cases/all/test8.eterm create mode 100644 Erlang/Sereal/test/cases/all/test8.srl create mode 100644 Erlang/Sereal/test/cases/all/test9.eterm create mode 100644 Erlang/Sereal/test/cases/all/test9.srl create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test17.eterm create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test17.srl create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test18.eterm create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test18.srl create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test2.eterm create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test2.srl create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test20.eterm create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test20.srl create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test27.eterm create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test27.srl create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test29.eterm create mode 100644 Erlang/Sereal/test/cases/arrayref_list/test29.srl create mode 100644 Erlang/Sereal/test/cases/maps_map/test5.eterm create mode 100644 Erlang/Sereal/test/cases/maps_map/test5.srl create mode 100644 Erlang/Sereal/test/cases/maps_tuple/test1.eterm create mode 100644 Erlang/Sereal/test/cases/maps_tuple/test1.srl create mode 100644 Erlang/Sereal/test/cases/maps_tuple/test3.eterm create mode 100644 Erlang/Sereal/test/cases/maps_tuple/test3.srl create mode 100644 Erlang/Sereal/test/cases/maps_tuple/test4.eterm create mode 100644 Erlang/Sereal/test/cases/maps_tuple/test4.srl create mode 100644 Erlang/Sereal/test/decoder_test.beam create mode 100644 Erlang/Sereal/test/looks_like_sereal_test.erl create mode 100644 Erlang/Sereal/test/round_trip.erl diff --git a/Erlang/Sereal/Makefile b/Erlang/Sereal/Makefile new file mode 100644 index 000000000..e7a3ef0b6 --- /dev/null +++ b/Erlang/Sereal/Makefile @@ -0,0 +1,20 @@ +C_OUT=c_src/*.o +DEBUG=0 +MAPS=0 + +all: deps eunit + +clean: + rm -f $(C_OUT) + rebar clean + +deps: + rebar get-deps + +compile: clean + rebar compile -D DEBUG=$(DEBUG) -D SEREAL_MAP_SUPPORT=$(MAPS) + +eunit: compile + rebar eunit + +.PHONY: clean all diff --git a/Erlang/Sereal/decoder/c_src/decoder.c b/Erlang/Sereal/decoder/c_src/decoder.c new file mode 100644 index 000000000..9725514f2 --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/decoder.c @@ -0,0 +1,1231 @@ + +#include +#include +#include +#include +#include + +#include "erl_nif.h" +#include "decoder.h" +#include "srl_protocol.h" +#include "utils.h" + +#include "snappy/csnappy_decompress.c" +#include "miniz.h" + +#include "uthash.h" + +#define STACK_SIZE_INCR 64 +#define NUM_BUF_LEN 32 + +#define DECODE_ARRAYREF_TO_LIST 1 + +#if WINDOWS || WIN32 +#define snprintf _snprintf +#endif + +#define IS_SRL_HDR_ARRAYREF(tag) (((tag) & SRL_HDR_ARRAYREF) == SRL_HDR_ARRAYREF) +#define IS_SRL_HDR_HASHREF(tag) (((tag) & SRL_HDR_HASHREF) == SRL_HDR_HASHREF) +#define IS_SRL_HDR_SHORT_BINARY(tag) (((tag) & SRL_HDR_SHORT_BINARY_LOW) == SRL_HDR_SHORT_BINARY_LOW) +#define SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag) ((tag) & SRL_MASK_SHORT_BINARY_LEN) + +enum { + + ST_DONE, + ST_VALUE, + ST_INVALID, + + ST_ARRAY_CLOSE, + ST_HASH_PAIR, + ST_HASH_CLOSE, + + ST_JUMP, + ST_JUMP_FROM_ZERO, + + ST_TRACK + +} SrlState; + +typedef struct { + ErlNifEnv* env; + SerealConstants* atoms; + + ERL_NIF_TERM input; + ErlNifBinary bin; + + + char* buffer; + int pos; + int len; + + char* status_stask_data; + int status_stack_size; + int status_stack_top; + + int* ref_stack_data; + int ref_stack_size; + int ref_stack_top; + + int header_parsed; + int body_pos; + + size_t bytes_per_iter; + int options; + +} Decoder; + +// ------------------------------------------------------- + +ErlNifUInt64 srl_read_varint_int64_nocheck(Decoder *d); +ERL_NIF_TERM dec_error(Decoder* d, const char* atom); + +// ------------------------------------------------------- + +struct reference_struct { + int pos; /* key, the position in the srl document */ + ERL_NIF_TERM term; + UT_hash_handle hh; /* makes this structure hashable */ +}; + +static int +load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + SerealConstants* st = enif_alloc(sizeof(SerealConstants)); + if(st == NULL) { + // no diagnostics? + return 1; + } + + init_sereal_constants(env, st); + + st->resource_decoder = enif_open_resource_type ( + env, + NULL, + "decoder", + decoder_destroy, + ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, + NULL + ); + + *priv = (void*) st; + + return 0; +} + +static int +reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + return 0; +} + +static int +upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return load(env, priv, info); +} + +static void +unload(ErlNifEnv* env, void* priv) +{ + enif_free(priv); + return; +} + +static ErlNifFunc funcs[] = +{ + {"nif_decoder_init", 2, decoder_init}, + {"nif_decoder_iterate", 4, decoder_iterate} +}; + +ERL_NIF_INIT(decoder, funcs, &load, &reload, &upgrade, &unload); + +struct reference_struct *references_hash = NULL; /* important! initialize to NULL */ + +void add_reference(int pos_to_add, ERL_NIF_TERM val) { + struct reference_struct *s; + + s = malloc(sizeof(struct reference_struct)); + s->pos = pos_to_add; + s->term = val; + HASH_ADD_INT( references_hash, pos, s ); /* pos: name of key field */ + debug_print("added item from pos %d\n", s->pos); +} + +struct reference_struct *find_reference(int pos) { + struct reference_struct *s; + HASH_FIND_INT( references_hash, &pos, s ); /* s: output pointer */ + return s; +} + + +Decoder* +decoder_new(ErlNifEnv* env) +{ + SerealConstants* st = (SerealConstants*) enif_priv_data(env); + + Decoder* result = enif_alloc_resource(st->resource_decoder, sizeof(Decoder)); + + if(result == NULL) { + return NULL; + } + + result->header_parsed = 0; + result->atoms = st; + + result->bytes_per_iter = 4096; + + result->pos = -1; + result->len = -1; + result->buffer = NULL; + + // status stack + result->status_stask_data = (char*) enif_alloc(STACK_SIZE_INCR * sizeof(char)); + + if(result->status_stask_data == NULL){ + dec_error(result, "Stack allocation failed"); + return NULL; + } + + result->status_stack_size = STACK_SIZE_INCR; + result->status_stack_top = 0; + result->options = 0; + + memset(result->status_stask_data, ST_INVALID, result->status_stack_size); + + result->status_stask_data[0] = ST_DONE; + result->status_stack_top++; + + result->status_stask_data[1] = ST_VALUE; + result->status_stack_top++; + + + // references stack + + result->ref_stack_data = (int*) enif_alloc(STACK_SIZE_INCR * sizeof(int)); + + if(result->ref_stack_data == NULL){ + dec_error(result, "Stack allocation failed"); + return NULL; + } + + result->ref_stack_size = STACK_SIZE_INCR; + result->ref_stack_top = 0; + + return result; +} + +void +dec_init(Decoder* decoder, ErlNifEnv* env, ERL_NIF_TERM input, ErlNifBinary* bin) +{ + decoder->env = env; + decoder->input = input; + + decoder->buffer = (char*) bin->data; + decoder->len = bin->size; + + // pos'd like to be more forceful on this check so that when + // we run a second iteration of the decoder we are sure + // that we're using the same binary. Unfortunately, I don't + // think there's a value to base this assertion on. + if(decoder->pos < 0) { + decoder->pos = 0; + + } else { + assert(decoder->pos <= decoder->len && "mismatched binary lengths"); + } +} + +void +decoder_destroy(ErlNifEnv* env, void* obj) +{ + Decoder* decoder = (Decoder*) obj; + + if(decoder->status_stask_data != NULL) { + enif_free(decoder->status_stask_data); + } +} + +ERL_NIF_TERM +dec_error(Decoder* d, const char* atom) +{ + ERL_NIF_TERM pos = enif_make_int(d->env, d->pos+1); + ERL_NIF_TERM msg = make_atom(d->env, atom); + ERL_NIF_TERM ret = enif_make_tuple2(d->env, pos, msg); + + return enif_make_tuple2(d->env, d->atoms->atom_error, ret); +} + +char +dec_current(Decoder* d) +{ + char result; + + if(d->status_stack_top > 0){ + result = d->status_stask_data[d->status_stack_top-1]; + + } else { + result = 0; + dec_error(d, "Fetching data from empty stack"); + } + + return result; +} + + +int +status_stack_top(Decoder* d) +{ + return d->status_stack_top; +} + +void +status_stack_push(Decoder* d, char val) +{ + if(d->status_stack_top >= d->status_stack_size) { + int new_sz = d->status_stack_size + STACK_SIZE_INCR; + char* tmp = (char*) enif_alloc(new_sz * sizeof(char)); + + memset(tmp, ST_INVALID, new_sz); + memcpy(tmp, d->status_stask_data, d->status_stack_size * sizeof(char)); + + enif_free(d->status_stask_data); + + d->status_stask_data = tmp; + d->status_stack_size = new_sz; + } + + d->status_stask_data[d->status_stack_top++] = val; +} + +void +status_stack_pop(Decoder* decoder, char val) +{ + assert(decoder->status_stask_data[decoder->status_stack_top-1] == val && "Popped invalid state."); + + if(decoder->status_stack_top > 0){ + decoder->status_stask_data[decoder->status_stack_top-1] = ST_INVALID; + decoder->status_stack_top--; + + } else { + dec_error(decoder, "Stack is empty"); + } +} + + + +void +ref_stack_push(Decoder* d, int val) +{ + if(d->ref_stack_top >= d->ref_stack_size) { + int new_sz = d->ref_stack_size + STACK_SIZE_INCR; + int* tmp = (int*) enif_alloc(new_sz * sizeof(int)); + + memcpy(tmp, d->ref_stack_data, d->ref_stack_size * sizeof(int)); + + enif_free(d->ref_stack_data); + + d->ref_stack_data = tmp; + d->ref_stack_size = new_sz; + } + + d->ref_stack_data[d->ref_stack_top++] = val; +} + +int +ref_stack_pop(Decoder* d) +{ + int val = 0; + if (d->ref_stack_top > 0){ + val = d->ref_stack_data[d->ref_stack_top-1]; + d->ref_stack_top--; + } else { + dec_error(d, "Stack is empty"); + } + return val; +} + +ERL_NIF_TERM +make_array(ErlNifEnv* env, ERL_NIF_TERM list) +{ + ERL_NIF_TERM item; + + ERL_NIF_TERM result = enif_make_list(env, 0); + while(enif_get_list_cell(env, list, &item, &list)) { + result = enif_make_list_cell(env, item, result); + } + + return result; +} + +ERL_NIF_TERM +decoder_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + /* argv[] should contain Data and Options */ + if(argc != 2) { + return enif_make_badarg(env); + } + + SerealConstants* st = (SerealConstants*) enif_priv_data(env); + + Decoder* decoder = decoder_new(env); + + if(decoder == NULL) { + return make_error(st, env, "Internal_error 1"); + } + + ERL_NIF_TERM arguments[] = { + argv[0], + enif_make_resource(env, decoder), + enif_make_list(env, 0), + enif_make_list(env, 0) + }; + + enif_release_resource(decoder); + + ERL_NIF_TERM opts = argv[1]; + if(!enif_is_list(env, opts)) { + return enif_make_badarg(env); + } + + // TODO: add support for other options: + // refuse_snappy, refuse_objects, no_bless_objects, + // validate_utf8(?), max_num_hash_entries, incremental(?), use_undef + int arity; + const ERL_NIF_TERM* tuple; + + ERL_NIF_TERM option; + while (enif_get_list_cell(env, opts, &option, &opts)) { + + if(!enif_get_tuple(env, option, &arity, &tuple)) { + return parse_error( st, env, "Options should be tuple", option ); + } + + if( !enif_compare(tuple[0], st->atom_bytes_per_iter) ) { + if ( arity == 2 && enif_get_uint(env, tuple[1], &decoder->bytes_per_iter) ) { + + } else{ + return parse_error( st, env, "Bytes per iteration should be a number value", option ); + } + + } else if ( !enif_compare(tuple[0] , st->atom_arrayref_to_list) ) { + decoder->options |= DECODE_ARRAYREF_TO_LIST; + + } else { + return parse_error( st, env, "Not supported option: ", tuple[0] ); + } + } + + + return decoder_iterate( env + , sizeof(arguments) / sizeof(arguments[0]) + , arguments); +} + +ERL_NIF_TERM +decoder_iterate(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + SerealConstants* st = (SerealConstants*) enif_priv_data(env); + + Decoder* decoder; + ErlNifBinary input; + + if( argc != 4 + || !enif_inspect_binary(env, argv[0], &input) + || !enif_get_resource(env, argv[1], st->resource_decoder, (void**) &decoder) + || !enif_is_list(env, argv[2]) + || !enif_is_list(env, argv[3])) { + + return enif_make_badarg(env); + } + + dec_init(decoder, env, argv[0], &input); + ERL_NIF_TERM objs = argv[2]; + ERL_NIF_TERM curr = argv[3]; + + size_t start = decoder->pos; + debug_print("Starting parsing from pos: %zu\n", start); + + ErlNifSInt64 header_size; + + /* First time: check header */ + if(! decoder->header_parsed ) { + + debug_print("Parsing header from %zu\n", start); + + decoder->header_parsed = 1; + + const char* errorMsg = NULL; + + int magic_string = 1, + high_magic_string = 1; + + int version_encoding; + int version; + int encoding_flags; + + int is_zlib_encoded = 0; + int is_snappy_encoded = 0; + int is_snappyincr_encoded = 0; + + // SRL_MAGIC_STRLEN + PROTOCOL_LENGTH + OPTIONAL-HEADER-SIZE(at least 1 byte) + DATA(at least 1 byte) + if (decoder->len < SRL_MAGIC_STRLEN + 1 + 1 + 1){ + errorMsg = "Sereal lacks data"; + + // yeah, this is correct. check usage + } else if ( (high_magic_string = strncmp((const char*) decoder->buffer, SRL_MAGIC_STRING, SRL_MAGIC_STRLEN)) + && (magic_string = strncmp((const char*) decoder->buffer, SRL_MAGIC_STRING_HIGHBIT, SRL_MAGIC_STRLEN))) { + + errorMsg = "Wrong magic string for Sereal"; + + } + + version_encoding = decoder->buffer[SRL_MAGIC_STRLEN]; + version = version_encoding & SRL_PROTOCOL_VERSION_MASK; + encoding_flags = version_encoding & SRL_PROTOCOL_ENCODING_MASK; + + if ( version <= 0 + || ( version < 3 && high_magic_string ) + || ( version > 2 && magic_string ) ) { + errorMsg = "Unsupported Sereal versions/protocol"; + } + + switch(encoding_flags) { + + case SRL_PROTOCOL_ENCODING_RAW: + /* no op */ + debug_print("Encoding is Raw\n"); + break; + + case SRL_PROTOCOL_ENCODING_SNAPPY: + debug_print("Encoding is Snappy\n"); + is_snappy_encoded = 1; + break; + + case SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL: + debug_print("Encoding is Snappy Incr\n"); + is_snappy_encoded = is_snappyincr_encoded = 1; + break; + + case SRL_PROTOCOL_ENCODING_ZLIB: + debug_print("Encoding is Zlib\n"); + is_zlib_encoded = 1; + break; + + default: + errorMsg = "Sereal document encoded in an unknown format"; + } + + if(errorMsg != NULL){ + return dec_error(decoder, errorMsg); + } + + debug_print("Header version is %d\n", version); + + // move after magic string and protocol version + decoder->pos += SRL_MAGIC_STRLEN + 1; + + header_size = srl_read_varint_int64_nocheck(decoder); + + debug_print("header size: %lu, d->pos = %d\n", header_size, decoder->pos); + + //TODO: add code for processing the header + decoder->pos += header_size; + + ErlNifSInt64 compressed_len; + uint32_t uncompressed_len; + int decompress_ok; + int header_len; + ErlNifBinary uncompressed; + + if (version < 2) { + decoder->body_pos = 0; + } else { + decoder->body_pos = decoder->pos - 1; + } + + if (is_snappy_encoded) { + if (is_snappyincr_encoded) { + compressed_len = srl_read_varint_int64_nocheck(decoder); + } else { + compressed_len = decoder->len - decoder->pos; + } + debug_print("snappy compressed len %lu\n", compressed_len); + + // decoder->pos is now at start of compressed payload + debug_print("unsnappying\n"); + + unsigned char *old_pos; + old_pos = (unsigned char * ) (decoder->buffer + decoder->pos * sizeof(unsigned char) ); + header_len = csnappy_get_uncompressed_length( + (char *)old_pos, + compressed_len, + &uncompressed_len + ); + if (header_len == CSNAPPY_E_HEADER_BAD) { + return dec_error(decoder, "Invalid Snappy header in Snappy-compressed Sereal packet"); + } + + /* allocate a new buffer for uncompressed data*/ + enif_alloc_binary((size_t) uncompressed_len, &uncompressed); + + decompress_ok = csnappy_decompress_noheader((char *) (old_pos + header_len), + compressed_len - header_len, + (char *) uncompressed.data, + &uncompressed_len); + if ( decompress_ok != 0 ) { + return dec_error(decoder, "Snappy decompression of Sereal packet payload failed"); + } + debug_print(" decompress OK: %s\n", uncompressed.data); + + // we fix decoder pos and len, then immediately return, to allow + // Erlang to iterate again. No need to set input and buffer, + // because they'll be irrelevant as soon as we return to Erlang, + // and will be set again at the start of the next iteration + + decoder->pos = 0; + decoder->len = uncompressed_len; + decoder->body_pos = -1; + + ERL_NIF_TERM new_input = enif_make_binary(env, &uncompressed); + + return enif_make_tuple5 ( + env, + st->atom_partial, + argv[1], + objs, + curr, + new_input + ); + + } else if (is_zlib_encoded) { + + ErlNifSInt64 uncompressed_len = srl_read_varint_int64_nocheck(decoder); + ErlNifSInt64 compressed_len = srl_read_varint_int64_nocheck(decoder); + + // decoder->pos is now at start of compressed payload + debug_print("unzipping\n"); + debug_print(" compressed_len : %ld\n", compressed_len); + debug_print(" uncompressed_len : %ld\n", uncompressed_len); + + + mz_ulong tmp = uncompressed_len; + ErlNifBinary uncompressed; + + /* allocate a new buffer for uncompressed data*/ + enif_alloc_binary((size_t) uncompressed_len, &uncompressed); + + unsigned char *compressed = (unsigned char * ) (decoder->buffer + decoder->pos * sizeof(unsigned char) ); + + int decompress_ok = mz_uncompress( + (unsigned char *) uncompressed.data, + &tmp, + (const unsigned char *) compressed, + compressed_len + ); + + debug_print(" decompress OK: %i\n", decompress_ok); + if (decompress_ok != Z_OK) { + return dec_error(decoder, "ZLIB decompression of Sereal packet payload failed"); + } + + // we fix decoder pos and len, then immediately return, to allow + // Erlang to iterate again. No need to set input and buffer, + // because they'll be irrelevant as soon as we return to Erlang, + // and will be set again at the start of the next iteration + + decoder->pos = 0; + decoder->len = tmp; + decoder->body_pos = -1; + + ERL_NIF_TERM new_input = enif_make_binary(env, &uncompressed); + + return enif_make_tuple5( + env, + st->atom_partial, + argv[1], + objs, + curr, + new_input + ); + } + + + } + + int len; + float float_value; + double double_value; + + ErlNifSInt64 int64_value; + ERL_NIF_TERM key; + ERL_NIF_TERM val = decoder->atoms->atom_undefined; + ERL_NIF_TERM result; + + while(1) { + + debug_print("==LOOP== iter. state: %d, pos = %d\n", dec_current(decoder), decoder->pos); + + /* check for processed data to bypass starvation */ + if(should_yield(decoder->pos - start, decoder->bytes_per_iter)) { + + debug_print("==YIELDING== state: %d, pos = %d\n", dec_current(decoder), decoder->pos); + + consume_timeslice(env, decoder->pos - start, decoder->bytes_per_iter); + + return enif_make_tuple4( + env, + st->atom_partial, + argv[1], + objs, + curr + ); + } + + if ( dec_current(decoder) == ST_DONE ) { + + debug_print("current state: ST_DONE\n"); + + if(decoder->pos == decoder->len){ + result = make_ok(st, env, curr); + + } else { + result = dec_error(decoder, "Wrong structured Sereal"); + } + + goto done; + } + + if ( dec_current(decoder) == ST_TRACK ) { + + ERL_NIF_TERM item_to_track; + + ERL_NIF_TERM ignore_me; + + status_stack_pop(decoder, ST_TRACK); + + debug_print("current state: ST_TRACK\n"); + if (! enif_get_list_cell(env, curr, &item_to_track, &ignore_me)) { + result = dec_error(decoder, "internal_error"); + goto done; + } + debug_print("got item\n"); + int pos = ref_stack_pop(decoder); + debug_print("adding item for pos %d\n", pos); + add_reference(pos, item_to_track); + continue; + } + + if ( dec_current(decoder) == ST_JUMP ) { + status_stack_pop(decoder, ST_JUMP); + int jump; + jump = ref_stack_pop(decoder); + debug_print("JUMPING TO %d + %d\n", jump, decoder->body_pos); + decoder->pos = jump + decoder->body_pos; + continue; + } + + if ( dec_current(decoder) == ST_JUMP_FROM_ZERO ) { + status_stack_pop(decoder, ST_JUMP_FROM_ZERO); + int jump; + jump = ref_stack_pop(decoder); + debug_print("JUMPING TO %d\n", jump); + decoder->pos = jump; + continue; + } + + if ( dec_current(decoder) == ST_ARRAY_CLOSE ) { + + status_stack_pop(decoder, ST_ARRAY_CLOSE); + + /* if option is on we convert ARRAYREFs to Erlang lists, not arrays */ + if (decoder->options & DECODE_ARRAYREF_TO_LIST) { + val = make_array(env, curr); + + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + result = dec_error(decoder, "Internal_error 2"); + goto done; + } + + curr = enif_make_list_cell(env, val, curr); + continue; + + } else { + // arrays are converted in Erlang to moduled arrays + return enif_make_tuple4 ( + env, + st->atom_convert, + argv[1], + objs, + curr + ); + } + + } + + if ( dec_current(decoder) == ST_HASH_PAIR ) { + + status_stack_pop(decoder, ST_HASH_PAIR); + + enif_get_list_cell(env, curr, &val, &curr); + enif_get_list_cell(env, curr, &key, &curr); + +#if SEREAL_MAP_SUPPORT + ERL_NIF_TERM new_map; + if ( !enif_make_map_put(env, curr, key, val, &new_map) ){ + return make_error(st, env, "Map decoding failed: ill-formed key-value pair"); + } + curr = new_map; +#else + val = enif_make_tuple2(env, key, val); + curr = enif_make_list_cell(env, val, curr); +#endif + + continue; + } + + if ( dec_current(decoder) == ST_HASH_CLOSE ) { + + status_stack_pop(decoder, ST_HASH_CLOSE); + +#if SEREAL_MAP_SUPPORT + val = curr; +#else + /* val = make_hash(env, curr, list_len); */ + val = enif_make_tuple1(env, curr); +#endif + + if(!enif_get_list_cell(env, objs, &curr, &objs)) { + result = dec_error(decoder, "internal_error 3"); + goto done; + } + + curr = enif_make_list_cell(env, val, curr); + continue; + } + + if (decoder->pos >= decoder->len) { + result = dec_error(decoder, "internal_error 4"); + goto done; + break; + } + + unsigned char tag = decoder->buffer[decoder->pos]; + + if (tag & SRL_HDR_TRACK_FLAG) { + tag = tag & ~SRL_HDR_TRACK_FLAG; + debug_print("tag must be tracked\n"); + + ref_stack_push(decoder, decoder->pos - decoder->body_pos); + + status_stack_pop(decoder, ST_VALUE); + status_stack_push(decoder, ST_TRACK); + status_stack_push(decoder, ST_VALUE); + debug_print("pushed ST_TRACK\n"); + } + + switch(dec_current(decoder)) { + case ST_VALUE: + debug_print("current state ST_VALUE\n"); + + if ( tag <= SRL_HDR_POS_HIGH ) { + debug_print("POSITIVE INTEGER tag %d, d->pos = %d\n", (int)tag, decoder->pos); + + decoder->pos++; + status_stack_pop(decoder, ST_VALUE); + + val = enif_make_int(decoder->env, (int)tag); + curr = enif_make_list_cell(env, val, curr); + + } else if ( tag <= SRL_HDR_NEG_HIGH) { + debug_print("NEGATIVE INTEGER tag %d, d->pos = %d\n", (int)tag, decoder->pos); + + decoder->pos++; + status_stack_pop(decoder, ST_VALUE); + + /* Small NEGs are from 16 to 31 in reverse order: (-16, -15.. , -1) */ + val = enif_make_int(decoder->env, (int)tag - 32); + curr = enif_make_list_cell(env, val, curr); + + } else if ( IS_SRL_HDR_SHORT_BINARY(tag) ) { + + len = SRL_HDR_SHORT_BINARY_LEN_FROM_TAG(tag); + decoder->pos++; + + debug_print("SHORT_BINARY of len %d, d->pos = %d\n", len, decoder->pos); + status_stack_pop(decoder, ST_VALUE); + + val = enif_make_sub_binary(decoder->env, decoder->input, decoder->pos, len); + curr = enif_make_list_cell(env, val, curr); + + debug_print("SHORT_BINARY value = %*c\n", len, tag); + decoder->pos += len; + + } else if ( IS_SRL_HDR_HASHREF(tag) ) { + + len = tag & 0xF; + decoder->pos++; + + debug_print("SHORT HASHREF of len %d, d->pos = %d\n", len, decoder->pos); + status_stack_pop(decoder, ST_VALUE); + + objs = enif_make_list_cell(env, curr, objs); + +#if SEREAL_MAP_SUPPORT + curr = enif_make_new_map(env); +#else + // create the temp list to store array elements in it + curr = enif_make_list(env, 0); +#endif + status_stack_push(decoder, ST_HASH_CLOSE); + while (len-- > 0) { + status_stack_push(decoder, ST_HASH_PAIR); + status_stack_push(decoder, ST_VALUE); + status_stack_push(decoder, ST_VALUE); + } + + } else if ( IS_SRL_HDR_ARRAYREF(tag) ) { + + len = tag & 0xF; + decoder->pos++; + + debug_print("SHORT ARRAY of len %d, d->pos = %d\n", len, decoder->pos); + status_stack_pop(decoder, ST_VALUE); + + // create the temp list to store array elements in it + objs = enif_make_list_cell(env, curr, objs); + curr = enif_make_list(env, 0); + + status_stack_push(decoder, ST_ARRAY_CLOSE); + while (len-- > 0) { + status_stack_push(decoder, ST_VALUE); + } + + } else { + + switch(tag) { + + case SRL_HDR_VARINT: + debug_print("VARINT, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + int64_value = srl_read_varint_int64_nocheck(decoder); + + val = enif_make_int64(decoder->env, int64_value); + curr = enif_make_list_cell(decoder->env, val, curr); + + debug_print("VARINT value = %ld\n", int64_value); + break; + + case SRL_HDR_ZIGZAG: + debug_print("ZIGZAG, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + ErlNifUInt64 _value = srl_read_varint_int64_nocheck(decoder); + + ErlNifSInt64 value = -( (_value + 1 ) / 2 ); + + val = enif_make_int64(decoder->env, value); + curr = enif_make_list_cell(decoder->env, val, curr); + + break; + + case SRL_HDR_FLOAT: + debug_print("FLOAT, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + float_value = *((float *) &(decoder->buffer[decoder->pos])); + debug_print("FLOAT value = %f\n", float_value); + + decoder->pos += sizeof(float); + + val = enif_make_double(decoder->env, (double) float_value); + curr = enif_make_list_cell(decoder->env, val, curr); + + break; + + case SRL_HDR_DOUBLE: + debug_print("DOUBLE, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + double_value = *((double *) &(decoder->buffer[decoder->pos])); + debug_print("DOUBLE value = %f\n", double_value); + + decoder->pos += sizeof(double); + + val = enif_make_double(decoder->env, (double) double_value); + curr = enif_make_list_cell(decoder->env, val, curr); + + break; + + case SRL_HDR_LONG_DOUBLE: + //TODO: add support + debug_print("LONG DOUBLE (not supported), d->pos = %d\n", decoder->pos); + result = dec_error(decoder, "long double not supported"); + + goto done; + break; + + case SRL_HDR_UNDEF: + debug_print("UNDEF, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + curr = enif_make_list_cell(decoder->env, decoder->atoms->atom_undefined, curr); + + break; + + case SRL_HDR_BINARY: + + debug_print("BINARY, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + int64_value = srl_read_varint_int64_nocheck(decoder); + + val = enif_make_sub_binary(decoder->env, decoder->input, decoder->pos, int64_value); + curr = enif_make_list_cell(env, val, curr); + + decoder->pos += int64_value; + + break; + + case SRL_HDR_STR_UTF8: + debug_print("STR_UTF8, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + int64_value = srl_read_varint_int64_nocheck(decoder); + + val = enif_make_sub_binary(decoder->env, decoder->input, decoder->pos, int64_value); + curr = enif_make_list_cell(env, val, curr); + + decoder->pos += int64_value; + + break; + + case SRL_HDR_REFN: + debug_print("REFN - ignored, d->pos = %d\n", decoder->pos); + decoder->pos++; + break; + + case SRL_HDR_REFP: + debug_print("REFP, d->pos = %d\n", decoder->pos); + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + int64_value = (int)srl_read_varint_int64_nocheck(decoder); + debug_print("REFP, MUST LOOK FOR %d\n", (int)int64_value); + + struct reference_struct *ref_s; + + ref_s = find_reference((int)int64_value); + if (ref_s == NULL) { + result = dec_error(decoder, "failed to find ref"); + goto done; + } + debug_print("found the reference!\n"); + ERL_NIF_TERM term_to_duplicate = ref_s->term; + debug_print("P1\n"); + val = enif_make_copy(env, term_to_duplicate); + debug_print("P2\n"); + curr = enif_make_list_cell(env, val, curr); + debug_print("P3\n"); + + break; + + case SRL_HDR_HASH: { + debug_print("HASH d->pos = %d\n", decoder->pos); + + decoder->pos++; + status_stack_pop(decoder, ST_VALUE); + + // create the temp list to store hash elements in it + objs = enif_make_list_cell(env, curr, objs); + +#if SEREAL_MAP_SUPPORT + curr = enif_make_new_map(env); +#else + curr = enif_make_list(env, 0); +#endif + + status_stack_push(decoder, ST_HASH_CLOSE); + + // read the hash length + int64_value = srl_read_varint_int64_nocheck(decoder); + debug_print("HASH: %ld pairs\n", int64_value); + + while (int64_value-- > 0) { + status_stack_push(decoder, ST_HASH_PAIR); + status_stack_push(decoder, ST_VALUE); + status_stack_push(decoder, ST_VALUE); + } + } + break; + + case SRL_HDR_ARRAY: + debug_print("ARRAY, d->pos = %d\n", decoder->pos); + + decoder->pos++; + status_stack_pop(decoder, ST_VALUE); + + // create the temp list to store array elements in it + objs = enif_make_list_cell(env, curr, objs); + curr = enif_make_list(env, 0); + + status_stack_push(decoder, ST_ARRAY_CLOSE); + + // read the array length + int64_value = srl_read_varint_int64_nocheck(decoder); + while (int64_value-- > 0) { + status_stack_push(decoder, ST_VALUE); + } + break; + + case SRL_HDR_OBJECT: + // TODO: add support + debug_print("OBJECT, d->pos = %d\n", decoder->pos); + result = dec_error(decoder, "OBJECT not supported"); + break; + + case SRL_HDR_OBJECTV: + // TODO: add support + debug_print("OBJECTV, d->pos = %d\n", decoder->pos); + result = dec_error(decoder, "OBJECTV not supported"); + break; + + case SRL_HDR_ALIAS: + debug_print("ALIAS is handled like COPY, deferring to COPY, d->pos = %d\n", decoder->pos); + + // no break, we handle ALIAS like COPY + + case SRL_HDR_COPY: + debug_print("COPY, d->pos = %d\n", decoder->pos); + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + int64_value = srl_read_varint_int64_nocheck(decoder); + debug_print("COPY, MUST JUMP TO %d + %d\n", decoder->body_pos, (int)int64_value); + + debug_print("THEN WE'LL JUMP TO HERE: %d\n", decoder->pos); + ref_stack_push(decoder, decoder->pos); + + // jump to the refered position + decoder->pos = (int)int64_value + decoder->body_pos; + + // push the fact that we need to read a value, then come back here. + status_stack_push(decoder, ST_JUMP_FROM_ZERO); + status_stack_push(decoder, ST_VALUE); + + break; + + case SRL_HDR_WEAKEN: + debug_print("WEAKEN - ignored, d->pos = %d\n", decoder->pos); + decoder->pos++; + break; + + case SRL_HDR_REGEXP: + // TODO: add support + debug_print("REGEXP, d->pos = %d\n", decoder->pos); + result = dec_error(decoder, "REGEXP not supported"); + break; + + case SRL_HDR_OBJECT_FREEZE: + // TODO: add support + debug_print("OBJECT, d->pos = %d\n", decoder->pos); + result = dec_error(decoder, "OBJECT not supported"); + break; + + case SRL_HDR_OBJECTV_FREEZE: + // TODO: add support + debug_print("OBJECTV, d->pos = %d\n", decoder->pos); + result = dec_error(decoder, "OBJECTV not supported"); + break; + + case SRL_HDR_CANONICAL_UNDEF: + debug_print("CANONICAL UNDEF, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + curr = enif_make_list_cell(decoder->env, decoder->atoms->atom_undefined, curr); + break; + + case SRL_HDR_FALSE: + debug_print("FALSE, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + curr = enif_make_list_cell(decoder->env, decoder->atoms->atom_false, curr); + break; + + case SRL_HDR_TRUE: + debug_print("TRUE, d->pos = %d\n", decoder->pos); + + status_stack_pop(decoder, ST_VALUE); + decoder->pos++; + + curr = enif_make_list_cell(decoder->env, decoder->atoms->atom_true, curr); + break; + + case SRL_HDR_PAD: + debug_print("PAD - ignored, d->pos = %d\n", decoder->pos); + decoder->pos++; + break; + + default: + result = dec_error(decoder, "invalid_sereal"); + goto done; + } + } + break; + + default: + result = dec_error(decoder, "Invalid internal state"); + goto done; + } + + } + + if(dec_current(decoder) != ST_DONE) { + result = dec_error(decoder, "Truncated Sereal"); + + } else { + result = val; + } + +done: + consume_timeslice(env, decoder->pos - start, decoder->bytes_per_iter); + return result; +} + +//TODO: UInt64 isn't a good mapping for varint, should find another one +ErlNifUInt64 srl_read_varint_int64_nocheck(Decoder *decoder) { + + ErlNifUInt64 result = 0; + unsigned lshift = 0; + + while (decoder->buffer[decoder->pos] & 0x80) { + result |= ((ErlNifUInt64)(decoder->buffer[decoder->pos] & 0x7F) << lshift); + lshift += 7; + decoder->pos++; + } + + result |= ((ErlNifUInt64)(decoder->buffer[decoder->pos]) << lshift); + decoder->pos++; + + return result; +} + + diff --git a/Erlang/Sereal/decoder/c_src/decoder.h b/Erlang/Sereal/decoder/c_src/decoder.h new file mode 100644 index 000000000..e196b430d --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/decoder.h @@ -0,0 +1,11 @@ +#ifndef SEREAL_DECODER_H +#define SEREAL_DECODER_H + +#include "erl_nif.h" + +ERL_NIF_TERM decoder_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); +ERL_NIF_TERM decoder_iterate(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]); + +void decoder_destroy(ErlNifEnv* env, void* obj); + +#endif // Included SEREAL_DECODER_H diff --git a/Erlang/Sereal/decoder/c_src/miniz.c b/Erlang/Sereal/decoder/c_src/miniz.c new file mode 120000 index 000000000..f94c94261 --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/miniz.c @@ -0,0 +1 @@ +../../../../Perl/shared/miniz.c \ No newline at end of file diff --git a/Erlang/Sereal/decoder/c_src/miniz.h b/Erlang/Sereal/decoder/c_src/miniz.h new file mode 120000 index 000000000..ea9e2cc94 --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/miniz.h @@ -0,0 +1 @@ +../../../../Perl/shared/miniz.h \ No newline at end of file diff --git a/Erlang/Sereal/decoder/c_src/snappy b/Erlang/Sereal/decoder/c_src/snappy new file mode 120000 index 000000000..d934069be --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/snappy @@ -0,0 +1 @@ +../../../../Perl/shared/snappy/ \ No newline at end of file diff --git a/Erlang/Sereal/decoder/c_src/srl_protocol.h b/Erlang/Sereal/decoder/c_src/srl_protocol.h new file mode 120000 index 000000000..cce266162 --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/srl_protocol.h @@ -0,0 +1 @@ +../../../../Perl/shared/srl_protocol.h \ No newline at end of file diff --git a/Erlang/Sereal/decoder/c_src/uthash.h b/Erlang/Sereal/decoder/c_src/uthash.h new file mode 100644 index 000000000..7205c67ef --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/uthash.h @@ -0,0 +1,958 @@ +/* +Copyright (c) 2003-2014, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ +#include /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#define DECLTYPE(x) +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while(0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while(0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ +#if defined (_WIN32) +#if defined(_MSC_VER) && _MSC_VER >= 1600 +#include +#elif defined(__WATCOMC__) +#include +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif +#else +#include +#endif + +#define UTHASH_VERSION 1.9.9 + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32 /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5 /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10 /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhe */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_bkt,_hf_hashv; \ + out=NULL; \ + if (head) { \ + HASH_FCN(keyptr,keylen, (head)->hh.tbl->num_buckets, _hf_hashv, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, _hf_hashv)) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], \ + keyptr,keylen,out); \ + } \ + } \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1ULL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8) + ((HASH_BLOOM_BITLEN%8) ? 1:0) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0 +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while(0) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + replaced=NULL; \ + HASH_FIND(hh,head,&((add)->fieldname),keylen_in,replaced); \ + if (replaced!=NULL) { \ + HASH_DELETE(hh,head,replaced); \ + }; \ + HASH_ADD(hh,head,fieldname,keylen_in,add); \ +} while(0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.next = NULL; \ + (add)->hh.key = (char*)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) { \ + head = (add); \ + (head)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh,head); \ + } else { \ + (head)->hh.tbl->tail->next = (add); \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } \ + (head)->hh.tbl->num_items++; \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_FCN(keyptr,keylen_in, (head)->hh.tbl->num_buckets, \ + (add)->hh.hashv, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt],&(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl,(add)->hh.hashv); \ + HASH_EMIT_KEY(hh,head,keyptr,keylen_in); \ + HASH_FSCK(hh,head); \ +} while(0) + +#define HASH_TO_BKT( hashv, num_bkts, bkt ) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1)); \ +} while(0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + unsigned _hd_bkt; \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev) { \ + ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next) { \ + ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],strlen(add->strfield),add) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ + HASH_REPLACE(hh,head,strfield[0],strlen(add->strfield),add,replaced) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + unsigned _bkt_i; \ + unsigned _count, _bkt_count; \ + char *_prev; \ + struct UT_hash_handle *_thh; \ + if (head) { \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %d, actual %d\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %d, actual %d\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hb_keylen=keylen; \ + char *_hb_key=(char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen--) { (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; } \ + bkt = (hashv) & (num_bkts-1); \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _sx_i; \ + char *_hs_key=(char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + bkt = hashv & (num_bkts-1); \ +} while (0) +/* FNV-1a variation */ +#define HASH_FNV(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _fn_i; \ + char *_hf_key=(char*)(key); \ + hashv = 2166136261UL; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _ho_i; \ + char *_ho_key=(char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned char *_hj_key=(unsigned char*)(key); \ + hashv = 0xfeedbeef; \ + _hj_i = _hj_j = 0x9e3779b9; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12; \ + } \ + hashv += keylen; \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); \ + case 5: _hj_j += _hj_key[4]; \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,num_bkts,hashv,bkt) \ +do { \ + unsigned char *_sfh_key=(unsigned char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = keylen; \ + \ + int _sfh_rem = _sfh_len & 3; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabe; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = (uint32_t)(get16bits (_sfh_key+2)) << 11 ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)] << 18); \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + bkt = hashv & (num_bkts-1); \ +} while(0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 0x3) == 1) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 0x3) == 2) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 0x3) == 3) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6b; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35l; \ + _h ^= _h >> 16; \ +} while(0) + +#define HASH_MUR(key,keylen,num_bkts,hashv,bkt) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353; \ + uint32_t _mur_c1 = 0xcc9e2d51; \ + uint32_t _mur_c2 = 0x1b873593; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+_mur_nblocks*4); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = _mur_h1*5+0xe6546b64; \ + } \ + _mur_tail = (const uint8_t*)(_mur_data + _mur_nblocks*4); \ + _mur_k1=0; \ + switch((keylen) & 3) { \ + case 3: _mur_k1 ^= _mur_tail[2] << 16; \ + case 2: _mur_k1 ^= _mur_tail[1] << 8; \ + case 1: _mur_k1 ^= _mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ + bkt = hashv & (num_bkts-1); \ +} while(0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* key comparison function; return 0 if keys equal */ +#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \ +do { \ + if (head.hh_head) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,head.hh_head)); \ + else out=NULL; \ + while (out) { \ + if ((out)->hh.keylen == keylen_in) { \ + if ((HASH_KEYCMP((out)->hh.key,keyptr,keylen_in)) == 0) break; \ + } \ + if ((out)->hh.hh_next) DECLTYPE_ASSIGN(out,ELMT_FROM_HH(tbl,(out)->hh.hh_next)); \ + else out = NULL; \ + } \ +} while(0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if (head.count >= ((head.expand_mult+1) * HASH_BKT_CAPACITY_THRESH) \ + && (addhh)->tbl->noexpand != 1) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while(0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2 * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1)) + \ + ((tbl->num_items & ((tbl->num_buckets*2)-1)) ? 1 : 0); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head) _he_newbkt->hh_head->hh_prev = \ + _he_thh; \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1) : 0; \ + if (tbl->ineff_expands > 1) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while(0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) break; \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0) || ((_hs_qsize > 0) && _hs_q )) { \ + if (_hs_psize == 0) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0) || !(_hs_q) ) { \ + _hs_e = _hs_p; \ + if (_hs_p){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail ) { \ + _hs_tail->next = ((_hs_e) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e) { \ + _hs_e->prev = ((_hs_tail) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail){ \ + _hs_tail->next = NULL; \ + } \ + if ( _hs_nmerges <= 1 ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh) { _last_elt_hh->next = _elt; } \ + if (!dst) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while(0) + +#define HASH_OVERHEAD(hh,head) \ + (size_t)((((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + (sizeof(UT_hash_table)) + \ + (HASH_BLOOM_BYTELEN))) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \ + el; (el)=(tmp),(tmp)=DECLTYPE(el)((tmp)?(tmp)->hh.next:NULL)) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1 +#define HASH_BLOOM_SIGNATURE 0xb12220f2 + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + char bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/Erlang/Sereal/decoder/c_src/utils.c b/Erlang/Sereal/decoder/c_src/utils.c new file mode 120000 index 000000000..1b057c8ef --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/utils.c @@ -0,0 +1 @@ +../../shared/utils.c \ No newline at end of file diff --git a/Erlang/Sereal/decoder/c_src/utils.h b/Erlang/Sereal/decoder/c_src/utils.h new file mode 120000 index 000000000..b92a2ed94 --- /dev/null +++ b/Erlang/Sereal/decoder/c_src/utils.h @@ -0,0 +1 @@ +../../shared/utils.h \ No newline at end of file diff --git a/Erlang/Sereal/decoder/src/decoder.erl b/Erlang/Sereal/decoder/src/decoder.erl new file mode 100644 index 000000000..5e003ddcf --- /dev/null +++ b/Erlang/Sereal/decoder/src/decoder.erl @@ -0,0 +1,75 @@ +-module(decoder). + +-export([deserealize/2]). + +-define(NOT_LOADED, not_loaded(?LINE)). + +-on_load(init/0). + +init() -> + PrivDir = case code:priv_dir(?MODULE) of + {error, _} -> + EbinDir = filename:dirname(code:which(?MODULE)), + AppPath = filename:dirname(EbinDir), + filename:join(AppPath, "decoder/priv"); + Path -> + Path + end, + erlang:load_nif(filename:join(PrivDir, ?MODULE), 0). + +deserealize(Data, Opts) when is_binary(Data), is_list(Opts) -> + case nif_decoder_init(Data, Opts) of + {error, _} = Error -> + throw(Error); + + {partial, Decoder, Objs, Curr} -> + decode_loop(Data, Decoder, Objs, Curr); + + %% Special case: the iteration only decompressed the payload and + %% returns the new uncompressed data. Let's use this and re-iterate + {partial, Decoder, Objs, Curr, NewData} -> + decode_loop(NewData, Decoder, Objs, Curr); + + % when c_code ask to create module-based data structures, currently on for arrays module + {convert, NewDecoder, NewObjs, NewCurr} -> + A = array:from_list(lists:reverse(NewCurr)), + [ Head | NewObjs2 ] = NewObjs, + NewCurr2 = [A | Head], + decode_loop(Data, NewDecoder, NewObjs2, NewCurr2); + + ESereal -> + ESereal + end. + +decode_loop(Data, Decoder, Objs, Curr) -> + case nif_decoder_iterate(Data, Decoder, Objs, Curr) of + {error, _} = Error -> + throw(Error); + + {partial, NewDecoder, NewObjs, NewCurr} -> + decode_loop(Data, NewDecoder, NewObjs, NewCurr); + + %% Special case: the iteration only decompressed the payload and + %% returns the new uncompressed data. Let's use this and re-iterate + {partial, NewDecoder, NewObjs, NewCurr, NewData} -> + decode_loop(NewData, NewDecoder, NewObjs, NewCurr); + + % when c_code asks to create module-based data structures, currently used only for arrays module + {convert, NewDecoder, NewObjs, NewCurr} -> + A = array:from_list(lists:reverse(NewCurr)), + [ Head | NewObjs2 ] = NewObjs, + NewCurr2 = [A | Head], + decode_loop(Data, NewDecoder, NewObjs2, NewCurr2); + + ESereal -> + ESereal + end. + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). + +nif_decoder_init(_, _) -> + ?NOT_LOADED. + +nif_decoder_iterate(_, _, _, _) -> + ?NOT_LOADED. diff --git a/Erlang/Sereal/encoder/c_src/encoder.c b/Erlang/Sereal/encoder/c_src/encoder.c new file mode 100644 index 000000000..33d32434f --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/encoder.c @@ -0,0 +1,865 @@ +#include +#include +#include +#include + +#include + +#include "erl_nif.h" +#include "srl_protocol.h" + +#include "snappy/csnappy_compress.c" + +#include "miniz.h" + +#include "utils.h" + +#define PARSE_ERROR(MSG) parse_error( sereal_constants, env, MSG, term ) + +typedef struct { + ErlNifBinary buffer; + unsigned int index; + + int options; + + int zlib_level; + int snappy_level; + + int bytes_per_iteration; + +} EncoderData; + +static int get_type(ErlNifEnv*, SerealConstants*, ERL_NIF_TERM); + +static int init_encoder_data(EncoderData* encoder_data); + +static ERL_NIF_TERM parse_options(ErlNifEnv* env, SerealConstants *st, EncoderData* encoder_data, ERL_NIF_TERM options); + +static void prepend(EncoderData *encoder_data, char* buffer, int len); +static void write_byte(EncoderData* encoder_data, unsigned char c); +static void write_bytes(EncoderData* encoder_data, char cs[]); +static void write_n_bytes(EncoderData* encoder_data, char *cs, int len); +static void write_header(ErlNifEnv* env, EncoderData* encoder_data); + +static void encode_varint(ErlNifUInt64); + +static ERL_NIF_TERM zlib_compress(SerealConstants *sereal_constants, ErlNifEnv *env, EncoderData* encoder_data); +static ERL_NIF_TERM snappy_compress(SerealConstants *sereal_constants, ErlNifEnv *env, EncoderData* encoder_data); + +ERL_NIF_TERM srl_encoder_setup(ErlNifEnv* env, int count, const ERL_NIF_TERM arguments[]); +ERL_NIF_TERM srl_encoder_parse(ErlNifEnv* env, int count, const ERL_NIF_TERM arguments[]); + +static ERL_NIF_TERM encoder_finalize(SerealConstants *sereal_constants, ErlNifEnv *env, EncoderData *encoder_data); + +static void encoder_destroy(ErlNifEnv* env, void* obj); + +enum TAGS { + SMALL_POS, + SMALL_NEG, + VARINT, + ZIGZAG, + FLOAT, // not supported + DOUBLE, + LONG_DOUBLE, // not supported + UNDEF, + BINARY, + ATOM, + BOOLEAN, + LIST, + TUPLE, + MAP, + TUPLE_MAP +}; + +#define BUF_SIZE 4096 +static char buffer[BUF_SIZE]; + +static int +load(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + SerealConstants* st = enif_alloc(sizeof(SerealConstants)); + if(st == NULL) { + // no diagnostics? + return 1; + } + + init_sereal_constants(env, st); + st->resource_encoder = enif_open_resource_type ( + env, + NULL, + "encoder", + encoder_destroy, + ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, + NULL + ); + + *priv = (void*) st; + + return 0; +} + +static int +reload(ErlNifEnv* env, void** priv, ERL_NIF_TERM info) +{ + return 0; +} + +static int +upgrade(ErlNifEnv* env, void** priv, void** old_priv, ERL_NIF_TERM info) +{ + return load(env, priv, info); +} + +static void +unload(ErlNifEnv* env, void* priv) +{ + enif_free(priv); + return; +} + +static ErlNifFunc funcs[] = +{ + {"srl_encoder_setup", 2, srl_encoder_setup}, + {"srl_encoder_parse", 2, srl_encoder_parse}, +}; + +ERL_NIF_INIT(encoder, funcs, &load, &reload, &upgrade, &unload); + +ERL_NIF_TERM srl_encoder_setup(ErlNifEnv* env, int count, const ERL_NIF_TERM arguments[]) { + + debug_print("Starting...\n"); + + SerealConstants* sereal_constants = enif_priv_data(env); + EncoderData *encoder_data = (EncoderData*) enif_alloc_resource( sereal_constants->resource_encoder, + sizeof(EncoderData) ); + if ( encoder_data == NULL ) { + return make_error(sereal_constants, env, "Allocation for EncoderData failed"); + } + + ERL_NIF_TERM error = NULL; + if ( !init_encoder_data(encoder_data) ) { + return make_error(sereal_constants, env, "Initialization for EncoderData failed"); + } + + if ( error = parse_options(env, sereal_constants, encoder_data, arguments[1]) ) { + return error; + } + + ERL_NIF_TERM encoder_resource = enif_make_resource(env, encoder_data); + + ERL_NIF_TERM result = enif_make_tuple3( + env, + sereal_constants->atom_partial, + arguments[0], + encoder_resource + ); + + debug_print("Releasing resource\n"); + enif_release_resource(encoder_data); + + return result; +} + +ERL_NIF_TERM srl_encoder_parse(ErlNifEnv* env, int count, const ERL_NIF_TERM arguments[]) { + + debug_print("Starting...\n"); + + SerealConstants* sereal_constants = enif_priv_data(env); + + ERL_NIF_TERM input = arguments[0]; + ERL_NIF_TERM encoder_resource = arguments[1]; + + int input_len = 0; + if ( !enif_get_list_length(env, input, &input_len) ) { + return make_error(sereal_constants, env, "Ill-formed input: failed to get the length"); + } + + EncoderData* encoder_data = NULL; + + if ( !enif_get_resource(env, encoder_resource, sereal_constants->resource_encoder, &encoder_data) ) { + return make_error(sereal_constants, env, "Failed to convert resource to EncoderData"); + } + + int index; + int intValue; + char charValue; + char *charPtr; + double dblValue; + unsigned uintValue; + ErlNifBinary binValue; + ErlNifSInt64 int64Value; + ErlNifUInt64 uint64Value; + + int previous_size = 0; + + while ( input_len-- ) { + + /* 1. Check whether to return to Erlang space */ + int percent = encoder_data->index * 100 / encoder_data->bytes_per_iteration; + if ( enif_consume_timeslice(env, percent) ) { + + debug_print("Yielding the process\n"); + return enif_make_tuple3( + env, + sereal_constants->atom_partial, + input, + encoder_resource + ); + } + + debug_print("%d items to decode\n", input_len + 1); + + ERL_NIF_TERM term; + + /* 2. Fetch next item to encode */ + if ( !enif_get_list_cell(env, input, &term, &input) ){ + goto done; + } + + /* 3. Encode */ + switch (get_type(env, sereal_constants, term)) { + + case SMALL_NEG: + debug_print("matched type = SMALL-NEG\n"); + if ( !enif_get_int(env, term, &intValue) ) { + return PARSE_ERROR( "Failed to extract integer value" ); + } + intValue += 32; + write_byte(encoder_data, (char)intValue); + break; + + case SMALL_POS: + debug_print("matched type = SMALL-POS\n"); + if ( !enif_get_int(env, term, &intValue) ){ + return PARSE_ERROR( "Failed to extract integer value" ); + } + write_byte(encoder_data, (char) intValue); + break; + + case ZIGZAG: + debug_print("matched type = ZIGZAG\n"); + if ( !enif_get_int64(env, term, &int64Value) ){ + return PARSE_ERROR( "Failed to extract integer value" ); + } + + write_byte(encoder_data, SRL_HDR_ZIGZAG); + + /* a. convert to zigzag format */ + int64Value = 2 * abs(int64Value) - 1; + + /* b. encode as varint */ + encode_varint(int64Value); + write_bytes(encoder_data, buffer); + + break; + + case VARINT: + debug_print("matched_type = VARINT\n"); + if ( !enif_get_int64(env, term, &int64Value) ){ + return PARSE_ERROR( "Failed to extract integer value" ); + } + + write_byte(encoder_data, SRL_HDR_VARINT); + + encode_varint(int64Value); + write_bytes(encoder_data, buffer); + + break; + + case DOUBLE: + debug_print("matched_type = DOUBLE\n"); + + if ( !enif_get_double(env, term, &dblValue) ) { + return PARSE_ERROR( "Failed to extract double value" ); + } + + write_byte(encoder_data, SRL_HDR_DOUBLE); + + charPtr = &dblValue; + write_n_bytes(encoder_data, charPtr, 8); + + break; + + case UNDEF: + debug_print("matched_type = UNDEF\n"); + write_byte(encoder_data, SRL_HDR_UNDEF); + break; + + case BOOLEAN: + + debug_print("matched_type = BOOLEAN\n"); + + charValue = !enif_compare(term, sereal_constants->atom_true) + ? SRL_HDR_TRUE + : SRL_HDR_FALSE; + + write_byte(encoder_data, charValue); + + break; + + case BINARY: + debug_print("matched_type = BINARY\n"); + + if ( !enif_inspect_binary(env, term, &binValue) ) { + return PARSE_ERROR( "Ill-formed binary: failed to read" ); + } + + /* default BINARY tag */ + if ( binValue.size <= 31 ) { + write_byte(encoder_data, binValue.size + SRL_HDR_SHORT_BINARY_LOW); + + } else { + write_byte(encoder_data, SRL_HDR_BINARY); + + /* encode length */ + encode_varint(binValue.size); + write_bytes(encoder_data, buffer); + } + + write_n_bytes(encoder_data, binValue.data, binValue.size); + + break; + + case ATOM: + /* Encode atom as a string */ + debug_print("matched_type = ATOM\n"); + + if ( !enif_get_atom_length(env, term, &uintValue, ERL_NIF_LATIN1) ) { + return PARSE_ERROR( "Ill-formed atom: failed to get length" ); + } + + /* default BINARY tag */ + if ( uintValue <= 31 ) { + write_byte(encoder_data, uintValue + SRL_HDR_SHORT_BINARY_LOW); + + } else { + write_byte(encoder_data, SRL_HDR_BINARY); + + /* encode length */ + encode_varint(uintValue); + write_bytes(encoder_data, buffer); + } + + if ( !enif_get_atom(env, term, buffer, uintValue + 1, ERL_NIF_LATIN1) ) { + return PARSE_ERROR( "Ill-formed atom"); + } + + write_bytes(encoder_data, buffer); + + break; + + case LIST /* ARRAY */ : { + debug_print("matched_type = LIST\n"); + + if ( !enif_get_list_length(env, term, &uintValue) ) { + return PARSE_ERROR( "Ill-formed list: failed to get the length" ); + } + + if ( uintValue <= 15 ) { + /* ARRAY_REF0..15 */ + write_byte(encoder_data, uintValue + SRL_HDR_ARRAYREF_LOW); + + } else { + write_byte(encoder_data, SRL_HDR_REFN); + write_byte(encoder_data, SRL_HDR_ARRAY); + + /* encode length */ + encode_varint(uintValue); + write_bytes(encoder_data, buffer); + } + + /* Add list elements to unparse list of input, but before reverse it */ + enif_make_reverse_list(env, term, &term); + + ERL_NIF_TERM head; + for (index = 0; index < uintValue; index++) { + enif_get_list_cell(env, term, &head, &term); + input = enif_make_list_cell(env, head, input); + } + } + break; + + case TUPLE: { + /* we pass tuple(tuple, array) back to erlang to convert it to list, useful for arrays(which are module based) */ + return enif_make_tuple4( + env, + sereal_constants->atom_convert, + input, + encoder_resource, + term + ); + } + break; + +#if SEREAL_MAP_SUPPORT + case MAP: { + debug_print("matched_type = MAP\n"); + + size_t map_size = 0; + if ( !enif_get_map_size(env, term, &map_size) ) { + return PARSE_ERROR( "Ill-formed map: failed to get size" ); + } + + if ( map_size <= 15 ) { + /* HASH_REF0..15 */ + write_byte(encoder_data, map_size + SRL_HDR_HASHREF_LOW); + + } else { + write_byte(encoder_data, SRL_HDR_REFN); + write_byte(encoder_data, SRL_HDR_HASH); + + /* encode length */ + encode_varint(map_size); + write_bytes(encoder_data, buffer); + } + + ErlNifMapIterator iterator; + if ( !enif_map_iterator_create(env, term, &iterator, ERL_NIF_MAP_ITERATOR_HEAD) ) { + return PARSE_ERROR( "Ill-formed map: failed to get iterator" ); + } + + ERL_NIF_TERM key, value; + do { + if ( !enif_map_iterator_get_pair(env, &iterator, &key, &value) ) { + return PARSE_ERROR( "Ill-formed map: failed to get pair" ); + } + + /* add `value`, then `key` to the input stack */ + input = enif_make_list_cell(env, value, input); + input = enif_make_list_cell(env, key, input); + + } while ( enif_map_iterator_next(env, &iterator) ); + } + break; +#endif /* SEREAL_MAP_SUPPORT */ + + case TUPLE_MAP: { + debug_print("matched_type = TUPLE-MAP\n"); + + ERL_NIF_TERM* tuple; + if( !enif_get_tuple(env, term, &intValue, &tuple) ){ + return PARSE_ERROR( "Ill-formed tuple based map" ); + } + + ERL_NIF_TERM pair_list = tuple[0]; + + if ( !enif_get_list_length(env, pair_list, &uintValue) ){ + return PARSE_ERROR( "Ill-formed tuple based map" ); + } + + if ( uintValue <= 15 ) { + /* HASH_REF0..15 */ + write_byte(encoder_data, uintValue + SRL_HDR_HASHREF_LOW); + + } else { + write_byte(encoder_data, SRL_HDR_REFN); + write_byte(encoder_data, SRL_HDR_HASH); + + /* encode length */ + encode_varint(uintValue); + write_bytes(encoder_data, buffer); + } + + ERL_NIF_TERM head; + ERL_NIF_TERM tail = pair_list; + + while ( uintValue-- ) { + + if ( !enif_get_list_cell(env, tail, &head, &tail) ) { + return parse_error( sereal_constants, + env, + "Failed to extract map key-value pair", + tail ); + } + + ERL_NIF_TERM* key_value; + if ( !enif_get_tuple(env, head, &index, &key_value) ) { + return parse_error( sereal_constants, + env, + "Wrongly formatted key-value pair", + head ); + } + + /* add `value`, then `key` to the input stack */ + input = enif_make_list_cell(env, key_value[1], input); + input = enif_make_list_cell(env, key_value[0], input); + } + } + break; + + default: + return PARSE_ERROR( "Unknown type to encode" ); + } + + if ( !enif_get_list_length(env, input, &input_len) ) { + return parse_error( sereal_constants, + env, + "Input is expected to be list", + input ); + } + } + +done: + return encoder_finalize(sereal_constants, env, encoder_data); +} + +static ERL_NIF_TERM encoder_finalize(SerealConstants *sereal_constants, ErlNifEnv *env, EncoderData *encoder_data) { + + debug_print("Finalizing encoder\n"); + + ERL_NIF_TERM error; + + if (encoder_data->zlib_level != -1) { + debug_print("Compressing as zlib level: %d\n", encoder_data->zlib_level); + + int uncompressed_length = encoder_data->index; + + if ( (error = zlib_compress(sereal_constants, env, encoder_data)) ) { + return error; + } + + encode_varint(encoder_data->index); + prepend(encoder_data, buffer, strlen(buffer)); + + encode_varint(uncompressed_length); + prepend(encoder_data, buffer, strlen(buffer)); + + } else if ( encoder_data->snappy_level != -1 ) { + debug_print("Compressing as snappy level: %d\n", encoder_data->snappy_level); + + if ( (error = snappy_compress(sereal_constants, env, encoder_data)) ) { + return error; + } + + encode_varint(encoder_data->index); + prepend(encoder_data, buffer, strlen(buffer)); + } + + debug_print("Writing header\n"); + write_header(env, encoder_data); + + ERL_NIF_TERM binary = enif_make_binary(env, &encoder_data->buffer); + ERL_NIF_TERM result = enif_make_sub_binary(env, binary, 0, encoder_data->index); + + return result; +} + + +static int get_type(ErlNifEnv *env, SerealConstants *sereal_constants, ERL_NIF_TERM input){ + + int result = -1; + + if (enif_is_number(env, input)) { + + double d; + ErlNifSInt64 number; + + if (enif_get_int64(env, input, &number)){ + + if (number < 0) { + result = number < -16 ? ZIGZAG : SMALL_NEG; + + } else { + result = number <= 15 ? SMALL_POS : VARINT; + } + + } else if (enif_get_double(env, input, &d)){ + result = DOUBLE; + } + + } else if (enif_is_atom(env, input)){ + + if ( !enif_compare(input, sereal_constants->atom_true) + || !enif_compare(input, sereal_constants->atom_false)) { + + result = BOOLEAN; + + } else if ( !enif_compare(input, sereal_constants->atom_undefined) ) { + result = UNDEF; + + } else { + result = ATOM; + } + + } else if (enif_is_binary(env, input)){ + result = BINARY; + + } else if (enif_is_list(env, input)){ + result = LIST; + +#if SEREAL_MAP_SUPPORT + } else if (enif_is_map(env, input)){ + return MAP; +#endif + + } else if (enif_is_tuple(env, input)){ + ERL_NIF_TERM* tuple; + unsigned length = 0; + + enif_get_tuple(env, input, &length, &tuple); + + if (length == 1) { + ERL_NIF_TERM first = tuple[0]; + + result = enif_is_list(env, first) + ? TUPLE_MAP + : TUPLE; + + } else { + result = TUPLE; + } + } + + return result; +} + +static inline size_t _max(size_t a, size_t b) { + return (size_t []){a,b}[a < b]; +} + +static void ensure_size(EncoderData* encoder_data, size_t size) { + if (encoder_data->buffer.size < size) { + debug_print("Reallocating binary\n"); + + /* useful for the case when buffer.size << 1 gets negative */ + size_t length = _max(encoder_data->buffer.size << 1, size); + enif_realloc_binary(&encoder_data->buffer, length); + } +} + +static void write_byte(EncoderData* encoder_data, unsigned char c) { + ensure_size(encoder_data, encoder_data->index + 1); + encoder_data->buffer.data[encoder_data->index++] = c; +} + +static void write_bytes(EncoderData* encoder_data, char cs[]) { + int len = strlen(cs); + write_n_bytes(encoder_data, cs, len); +} + +static void write_n_bytes(EncoderData* encoder_data, char *cs, int len) { + ensure_size(encoder_data, encoder_data->index + len); + + int i; + for ( i = 0; i < len; i++ ) { + write_byte(encoder_data, cs[i]); + } +} + +static ERL_NIF_TERM parse_options(ErlNifEnv *env, SerealConstants *sereal_constants, EncoderData* encoder_data, ERL_NIF_TERM options){ + + debug_print("Starting...\n"); + + int length; + + if ( !enif_get_list_length(env, options, &length) ) { + return parse_error( sereal_constants, + env, + "Failed to get options list length", + options ); + } + + debug_print("Parsing options, %d of options\n", length); + + ERL_NIF_TERM head; + ERL_NIF_TERM tail = options; + + int i; + for ( i = 0; i < length; i++ ) { + + debug_print("Extracting head\n"); + if ( !enif_get_list_cell(env, tail, &head, &tail) ) { + return parse_error ( sereal_constants, + env, + "Failed to extract next option", + tail ); + } + + debug_print("Reading next option tuple\n"); + + int arity; + ERL_NIF_TERM *option; + + if ( !enif_get_tuple(env, head, &arity, &option) ) { + return parse_error( sereal_constants, + env, + "Options should be in tuple format", + head ); + } + + if ( !enif_compare(option[0], sereal_constants->atom_zlib) ) { + + int level = 0; + if ( arity == 2 && enif_get_int(env, option[1], &level) + && level >= 0 && level <= 9 ) { + + encoder_data->zlib_level = level; + + } else { + return make_error( sereal_constants, + env, + "Compression level should be an integer between 0 and 9" ); + } + + } else if ( !enif_compare(option[0], sereal_constants->atom_snappy) ) { + + if(arity == 1) { + encoder_data->snappy_level = 1; + + } else { + return make_error( sereal_constants, + env, + "Snappy should have no options" ); + } + + } else { + debug_print("Unsupported option\n"); + return parse_error ( sereal_constants, + env, + "Unsupported option", + option[0] ); + + } + + + } + + return NULL; +} + +static void prepend(EncoderData *encoder_data, char* buffer, int len) { + + ensure_size(encoder_data, encoder_data->index + len); + + /* move data to the tail */ + int i; + for ( i = encoder_data->index - 1; i >= 0; i-- ) { + encoder_data->buffer.data[i + len] = encoder_data->buffer.data[i]; + } + + memcpy(encoder_data->buffer.data, buffer, len); + encoder_data->index += len; +} + +static void write_header(ErlNifEnv* env, EncoderData* encoder_data) { + + debug_print("Writing header\n"); + + int HEADER_SIZE = SRL_MAGIC_STRLEN + + 1 /* protocol version */ + + 1 /* optional suffix size */; + + debug_print("Writing magic string\n"); + + int i; + for (i = 0; i < SRL_MAGIC_STRLEN; i++ ) { + buffer[i] = SRL_MAGIC_STRING_HIGHBIT[i]; + } + + debug_print("Writing protocol version\n"); + buffer[4] = SRL_PROTOCOL_VERSION; + + if ( encoder_data->zlib_level != -1 ) { + buffer[4] |= SRL_PROTOCOL_ENCODING_ZLIB; + + } else if ( encoder_data->snappy_level != -1 ) { + buffer[4] |= SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL; + } + + /* optional suffix size */ + buffer[5] = 0; + + prepend(encoder_data, buffer, HEADER_SIZE); +} + +void encoder_destroy(ErlNifEnv* env, void* obj) { +} + +static void encode_varint(ErlNifUInt64 intValue) { + + int index = 0; + while (intValue > 0x7F) { + unsigned char b = (intValue & 0x7F) | 0x80; + buffer[index++] = b; + intValue >>= 7; + } + + buffer[index++] = intValue; + buffer[index] = 0; +} + +static ERL_NIF_TERM zlib_compress(SerealConstants *sereal_constants, ErlNifEnv *env, EncoderData* encoder_data) { + + debug_print("Zlib compression\n"); + + ErlNifBinary compressed; + if ( !enif_alloc_binary(encoder_data->buffer.size, &compressed) ) { + return make_error(sereal_constants, env, "Allocation of compressed buffer failed"); + } + + int status; + mz_ulong length = compressed.size; + if ( MZ_OK != (status = compress2( compressed.data, + &length, + encoder_data->buffer.data, + encoder_data->index, + encoder_data->zlib_level )) ) { + + return make_error( sereal_constants, + env, + mz_error(status) ); + } + + enif_release_binary(&encoder_data->buffer); + + encoder_data->buffer = compressed; + encoder_data->index = length; + + debug_print("Compressed length: %u\n", length); + + return NULL; +} + +static ERL_NIF_TERM snappy_compress(SerealConstants *sereal_constants, ErlNifEnv *env, EncoderData* encoder_data) { + + debug_print("Snappy compression\n"); + + ErlNifBinary compressed; + + if ( !enif_alloc_binary(encoder_data->buffer.size, &compressed) ) { + return make_error(sereal_constants, env, "Allocation of compressed buffer failed"); + } + + int length = compressed.size; + csnappy_compress( encoder_data->buffer.data, + encoder_data->index, + compressed.data, + &length, + buffer, + 12 /* buffer size=2^12 */); + + enif_release_binary(&encoder_data->buffer); + + encoder_data->buffer = compressed; + encoder_data->index = length; + + debug_print("Compressed length: %u\n", length); + + return NULL; +} + +static int init_encoder_data(EncoderData* encoder_data){ + + if ( !enif_alloc_binary(BUF_SIZE, &encoder_data->buffer) ) { + return 0; + } + + encoder_data->index = 0; + encoder_data->options = 0; + encoder_data->zlib_level = encoder_data->snappy_level = -1; + encoder_data->bytes_per_iteration = DEFAULT_BYTES_PER_ITERATION; + + return 1; +} + + diff --git a/Erlang/Sereal/encoder/c_src/miniz.c b/Erlang/Sereal/encoder/c_src/miniz.c new file mode 120000 index 000000000..f94c94261 --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/miniz.c @@ -0,0 +1 @@ +../../../../Perl/shared/miniz.c \ No newline at end of file diff --git a/Erlang/Sereal/encoder/c_src/miniz.h b/Erlang/Sereal/encoder/c_src/miniz.h new file mode 120000 index 000000000..ea9e2cc94 --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/miniz.h @@ -0,0 +1 @@ +../../../../Perl/shared/miniz.h \ No newline at end of file diff --git a/Erlang/Sereal/encoder/c_src/snappy b/Erlang/Sereal/encoder/c_src/snappy new file mode 120000 index 000000000..d934069be --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/snappy @@ -0,0 +1 @@ +../../../../Perl/shared/snappy/ \ No newline at end of file diff --git a/Erlang/Sereal/encoder/c_src/srl_protocol.h b/Erlang/Sereal/encoder/c_src/srl_protocol.h new file mode 120000 index 000000000..cce266162 --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/srl_protocol.h @@ -0,0 +1 @@ +../../../../Perl/shared/srl_protocol.h \ No newline at end of file diff --git a/Erlang/Sereal/encoder/c_src/utils.c b/Erlang/Sereal/encoder/c_src/utils.c new file mode 120000 index 000000000..1b057c8ef --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/utils.c @@ -0,0 +1 @@ +../../shared/utils.c \ No newline at end of file diff --git a/Erlang/Sereal/encoder/c_src/utils.h b/Erlang/Sereal/encoder/c_src/utils.h new file mode 120000 index 000000000..b92a2ed94 --- /dev/null +++ b/Erlang/Sereal/encoder/c_src/utils.h @@ -0,0 +1 @@ +../../shared/utils.h \ No newline at end of file diff --git a/Erlang/Sereal/encoder/src/encoder.erl b/Erlang/Sereal/encoder/src/encoder.erl new file mode 100644 index 000000000..c89c406bf --- /dev/null +++ b/Erlang/Sereal/encoder/src/encoder.erl @@ -0,0 +1,66 @@ +-module(encoder). + +-export([serealize/2]). + +-define(NOT_LOADED, not_loaded(?LINE)). + +-on_load(init/0). + +init() -> + PrivDir = case code:priv_dir(?MODULE) of + {error, _} -> + EbinDir = filename:dirname(code:which(?MODULE)), + AppPath = filename:dirname(EbinDir), + filename:join(AppPath, "encoder/priv"); + Path -> + Path + end, + erlang:load_nif(filename:join(PrivDir, ?MODULE), 0). + +serealize(Data, Opts) -> + case srl_encoder_setup([Data], Opts) of + {error, _} = Error -> + throw(Error); + + {error, _, _} = Error -> + throw(Error); + + {partial, Items, Encoder} -> + encoder_loop(Items, Encoder) + + end. + +encoder_loop(Items, Encoder) -> + case srl_encoder_parse(Items, Encoder) of + {error, _} = Error-> + throw(Error); + + {error, _, _} = Error -> + throw(Error); + + {convert, NewItems, NewEncoder, Term} -> + NewTerm = term_to_list(Term), + NewItems2 = [NewTerm | NewItems], + encoder_loop(NewItems2, NewEncoder); + + {partial, NewItems, NewEncoder} -> + encoder_loop(NewItems, NewEncoder); + + EncoderBinary -> + EncoderBinary + end. + +not_loaded(Line) -> + erlang:nif_error({not_loaded, [{module, ?MODULE}, {line, Line}]}). + +srl_encoder_setup(_, _) -> + ?NOT_LOADED. + +srl_encoder_parse(_, _) -> + ?NOT_LOADED. + +term_to_list(Term) -> + case array:is_array(Term) of + true -> array:to_list(Term); + _ -> tuple_to_list(Term) + end. diff --git a/Erlang/Sereal/rebar.config b/Erlang/Sereal/rebar.config new file mode 100644 index 000000000..0feff4721 --- /dev/null +++ b/Erlang/Sereal/rebar.config @@ -0,0 +1,41 @@ +{port_specs, [ + {"decoder/priv/decoder.so", [ + "decoder/c_src/*.c" + ]}, + {"encoder/priv/encoder.so", [ + "encoder/c_src/*.c" + ]} + +]}. + +{port_env, [ + {".*", "CFLAGS", "$CFLAGS -g -w -Wall -Werror -O3 -fno-strict-aliasing"}, + {".*", "CXXFLAGS", "$CXXFLAGS -g -w -Wall -Werror -O3"}, + + {"(linux|solaris|freebsd|netbsd|openbsd|dragonfly|darwin)", + "LDFLAGS", "$LDFLAGS -lstdc++"}, + + %% OS X Leopard flags for 64-bit + {"darwin9.*-64$", "CXXFLAGS", "-m64"}, + {"darwin9.*-64$", "LDFLAGS", "-arch x86_64"}, + + %% OS X Snow Leopard flags for 32-bit + {"darwin10.*-32$", "CXXFLAGS", "-m32"}, + {"darwin10.*-32$", "LDFLAGS", "-arch i386"}, + + %% This will merge into basho/rebar/rebar.config eventually + {"win32", "CFLAGS", "/Wall /DWIN32 /D_WINDOWS /D_WIN32 /DWINDOWS"}, + {"win32", "CXXFLAGS", "-g -Wall -O3"} +]}. + +{erl_opts, [ + {platform_define, "R1(1|2|3|4|5|6|7)"}, + {src_dirs, ["decoder/src", "encoder/src", "src"]} +]}. + +{eunit_opts, [ + verbose, + {report, { + eunit_surefire, [{dir,"."}] + }} +]}. diff --git a/Erlang/Sereal/shared/utils.c b/Erlang/Sereal/shared/utils.c new file mode 100644 index 000000000..b571e1468 --- /dev/null +++ b/Erlang/Sereal/shared/utils.c @@ -0,0 +1,91 @@ +// This file is part of Jiffy released under the MIT license. +// See the LICENSE file for more information. + +#include "utils.h" +#include "erl_nif.h" + +#define UNLIMITED (0) + +void init_sereal_constants(ErlNifEnv *env, SerealConstants *st) { + + st->atom_ok = make_atom(env, "ok"); + st->atom_error = make_atom(env, "error"); + st->atom_true = make_atom(env, "true"); + st->atom_false = make_atom(env, "false"); + st->atom_bignum = make_atom(env, "bignum"); + st->atom_bignum_e = make_atom(env, "bignum_e"); + st->atom_bigdbl = make_atom(env, "bigdbl"); + st->atom_undefined = make_atom(env, "undefined"); + st->atom_partial = make_atom(env, "partial"); + st->atom_convert = make_atom(env, "convert"); + st->atom_zlib = make_atom(env, "zlib"); + st->atom_snappy = make_atom(env, "snappy"); + st->atom_bytes_per_iter = make_atom(env, "bytes_per_iter"); + st->atom_arrayref_to_list = make_atom(env, "arrayref_to_list"); + +// st->resource_encoder = enif_open_resource_type ( +// env, +// NULL, +// "encoder", +// encoder_destroy, +// ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, +// NULL +// ); +// +} + +ERL_NIF_TERM +make_atom(ErlNifEnv* env, const char* name) +{ + ERL_NIF_TERM result; + + if(!enif_make_existing_atom(env, name, &result, ERL_NIF_LATIN1)) { + result = enif_make_atom(env, name); + } + + return result; +} + +ERL_NIF_TERM +make_ok(SerealConstants* st, ErlNifEnv* env, ERL_NIF_TERM value) +{ + return enif_make_tuple2(env, st->atom_ok, value); +} + +ERL_NIF_TERM +make_error(SerealConstants* st, ErlNifEnv* env, const char* error) +{ + return enif_make_tuple2(env, st->atom_error, make_atom(env, error)); +} + +ERL_NIF_TERM +parse_error(SerealConstants* st, ErlNifEnv* env, const char* error, ERL_NIF_TERM term) +{ + return enif_make_tuple3(env, st->atom_error, make_atom(env, error), term); +} + +int +should_yield(size_t used, size_t limit) +{ + return !(limit == UNLIMITED || used < limit); +} + +int +consume_timeslice(ErlNifEnv* env, size_t used, size_t limit) +{ +#if(ERL_NIF_MAJOR_VERSION >= 2 && ERL_NIF_MINOR_VERSION >= 4) + double u = (double) used; + double l = (double) limit; + int perc = (int) (100.0 * (u / l)); + + if(perc < 1) { + perc = 1; + } else if(perc > 100) { + perc = 100; + } + + return enif_consume_timeslice(env, perc); +#else + return 0; +#endif +} diff --git a/Erlang/Sereal/shared/utils.h b/Erlang/Sereal/shared/utils.h new file mode 100644 index 000000000..66e715ff7 --- /dev/null +++ b/Erlang/Sereal/shared/utils.h @@ -0,0 +1,41 @@ +#ifndef UTILS_H +#define UTILS_H + +#include "erl_nif.h" + +#define debug_print(fmt, ...) \ + do { if (DEBUG) fprintf(stderr, "%s:%d:%s(): " fmt, __FILE__, \ + __LINE__, __func__, ##__VA_ARGS__); } while (0) + +#define DEFAULT_BYTES_PER_ITERATION 4096 + +typedef struct { + ERL_NIF_TERM atom_ok; + ERL_NIF_TERM atom_error; + ERL_NIF_TERM atom_true; + ERL_NIF_TERM atom_false; + ERL_NIF_TERM atom_bignum; + ERL_NIF_TERM atom_bignum_e; + ERL_NIF_TERM atom_bigdbl; + ERL_NIF_TERM atom_undefined; + ERL_NIF_TERM atom_partial; + ERL_NIF_TERM atom_convert; + ERL_NIF_TERM atom_zlib; + ERL_NIF_TERM atom_snappy; + ERL_NIF_TERM atom_bytes_per_iter; + ERL_NIF_TERM atom_arrayref_to_list; + + ErlNifResourceType* resource_encoder; + ErlNifResourceType* resource_decoder; + +} SerealConstants; + +ERL_NIF_TERM make_atom(ErlNifEnv* env, const char* name); +ERL_NIF_TERM make_ok(SerealConstants* st, ErlNifEnv* env, ERL_NIF_TERM data); +ERL_NIF_TERM make_error(SerealConstants* st, ErlNifEnv* env, const char* error); +ERL_NIF_TERM parse_error(SerealConstants* st, ErlNifEnv* env, const char* error, ERL_NIF_TERM term); + +int should_yield(size_t used, size_t limit); +int consume_timeslice(ErlNifEnv* env, size_t used, size_t limit); + +#endif // Included UTILS_H diff --git a/Erlang/Sereal/src/sereal.app.src b/Erlang/Sereal/src/sereal.app.src new file mode 100644 index 000000000..e506722a6 --- /dev/null +++ b/Erlang/Sereal/src/sereal.app.src @@ -0,0 +1,6 @@ +{application, sereal, [ + {description, "Sereal [En|De]coder."}, + {vsn, git}, + {registered, []}, + {applications, [kernel, stdlib]} +]}. diff --git a/Erlang/Sereal/src/sereal.erl b/Erlang/Sereal/src/sereal.erl new file mode 100644 index 000000000..44ba8ae2f --- /dev/null +++ b/Erlang/Sereal/src/sereal.erl @@ -0,0 +1,15 @@ +-module(sereal). + +-export([encode/1, encode/2, decode/1, decode/2]). + +encode(Data) -> + encode(Data, []). + +encode(Data, Opts) -> + encoder:serealize(Data, Opts). + +decode(Data) -> + decode(Data, []). + +decode(Data, Opts) -> + decoder:deserealize(Data, Opts). diff --git a/Erlang/Sereal/test/basic_tests.erl b/Erlang/Sereal/test/basic_tests.erl new file mode 100644 index 000000000..22a6f5188 --- /dev/null +++ b/Erlang/Sereal/test/basic_tests.erl @@ -0,0 +1,50 @@ +-module(basic_tests). + +-include_lib("eunit/include/eunit.hrl"). + +all_test() -> + Cases = read_cases("all"), + [gen(Case) || Case <- Cases]. + +arrayref_to_list_test() -> + Cases = read_cases("arrayref_list"), + [gen(Case, [{arrayref_to_list}]) || Case <- Cases]. + +-ifdef(SEREAL_MAP_SUPPORT). + +maps_map_test() -> + Cases = read_cases("maps_map"), + [gen(Case) || Case <- Cases]. + +-else. + +maps_tuple_test() -> + Cases = read_cases("maps_tuple"), + [gen(Case) || Case <- Cases]. + +-endif. + +gen({Name, Srl, {error, _}=Erl}) -> + {Name, ?_assertThrow(Erl, sereal:decode(Srl))}; + +gen({Name, Srl, Erl}) -> + {ok, Decodeds} = sereal:decode(Srl), + [Decoded | _ ] = Decodeds, + {Name, ?assertEqual(Erl, Decoded)}. + +gen({Name, Srl, Erl}, DecoderOpts) -> + {ok, Decodeds} = sereal:decode(Srl, DecoderOpts), + [Decoded | _ ] = Decodeds, + {Name, ?assertEqual(Erl, Decoded)}. + +read_cases(Dirname) -> + CasesPath = filename:join(["..", "test", "cases", Dirname, "*.srl"]), + FileNames = lists:sort(filelib:wildcard(CasesPath)), + lists:map(fun(F) -> make_pair(F) end, FileNames). + +make_pair(FileName) -> + {ok, Srl} = file:read_file(FileName), + BaseName = filename:rootname(FileName), + ErlFname = BaseName ++ ".eterm", + {ok, [Term]} = file:consult(ErlFname), + {filename:basename(BaseName), Srl, Term}. diff --git a/Erlang/Sereal/test/cases/all/test10.eterm b/Erlang/Sereal/test/cases/all/test10.eterm new file mode 100644 index 000000000..d199395b5 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test10.eterm @@ -0,0 +1 @@ +15. diff --git a/Erlang/Sereal/test/cases/all/test10.srl b/Erlang/Sereal/test/cases/all/test10.srl new file mode 100644 index 0000000000000000000000000000000000000000..5803ea8bed63d6ca2d44013dc5116182b009c35d GIT binary patch literal 7 OcmcC5F3MqI;0FK*V*#K5 literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test11.eterm b/Erlang/Sereal/test/cases/all/test11.eterm new file mode 100644 index 000000000..169335a9c --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test11.eterm @@ -0,0 +1 @@ +undefined. diff --git a/Erlang/Sereal/test/cases/all/test11.srl b/Erlang/Sereal/test/cases/all/test11.srl new file mode 100644 index 0000000000000000000000000000000000000000..c93ec11d5485d5043cdbdee0ac966f1211a73778 GIT binary patch literal 7 OcmcC5F3MqIPz3-8c>%Tn literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test12.eterm b/Erlang/Sereal/test/cases/all/test12.eterm new file mode 100644 index 000000000..ea67d24b9 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test12.eterm @@ -0,0 +1 @@ +<<>>. diff --git a/Erlang/Sereal/test/cases/all/test12.srl b/Erlang/Sereal/test/cases/all/test12.srl new file mode 100644 index 0000000000000000000000000000000000000000..bca4fa4069da4fd13537135625ef42d07ac007c6 GIT binary patch literal 7 OcmcC5F3MqINB{r`v;pw| literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test13.eterm b/Erlang/Sereal/test/cases/all/test13.eterm new file mode 100644 index 000000000..75152cf12 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test13.eterm @@ -0,0 +1 @@ +<<"1">>. diff --git a/Erlang/Sereal/test/cases/all/test13.srl b/Erlang/Sereal/test/cases/all/test13.srl new file mode 100644 index 0000000000000000000000000000000000000000..0139bdee4b89180f30db1da545cc91a512abe9cb GIT binary patch literal 8 PcmcC5F3MqINHhch3fKZ8 literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test14.eterm b/Erlang/Sereal/test/cases/all/test14.eterm new file mode 100644 index 000000000..58743bf4c --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test14.eterm @@ -0,0 +1 @@ +<<"91a">>. diff --git a/Erlang/Sereal/test/cases/all/test14.srl b/Erlang/Sereal/test/cases/all/test14.srl new file mode 100644 index 0000000000000000000000000000000000000000..59164252b4fa7434cc0a34ade2a565b35a6ca0a1 GIT binary patch literal 10 RcmcC5F3MqINVYUg1ON~L0>1zN literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test15.eterm b/Erlang/Sereal/test/cases/all/test15.eterm new file mode 100644 index 000000000..7d070bbc7 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test15.eterm @@ -0,0 +1 @@ +<<"abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc">>. diff --git a/Erlang/Sereal/test/cases/all/test15.srl b/Erlang/Sereal/test/cases/all/test15.srl new file mode 100644 index 0000000000000000000000000000000000000000..526cf21ebdb4e1728223fad124091b4cec18a372 GIT binary patch literal 3009 xcmcC5F3MqIP}?D%n3Oz literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test21.eterm b/Erlang/Sereal/test/cases/all/test21.eterm new file mode 100644 index 000000000..3817ed069 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test21.eterm @@ -0,0 +1 @@ +-200. diff --git a/Erlang/Sereal/test/cases/all/test21.srl b/Erlang/Sereal/test/cases/all/test21.srl new file mode 100644 index 0000000000000000000000000000000000000000..db463e6d4bc7477026c987938e5c4ca1586a9a0c GIT binary patch literal 9 QcmcEhT$IDipxDn001*EI#Q*>R literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test22.eterm b/Erlang/Sereal/test/cases/all/test22.eterm new file mode 100644 index 000000000..2dabd8c68 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test22.eterm @@ -0,0 +1 @@ +-1. diff --git a/Erlang/Sereal/test/cases/all/test22.srl b/Erlang/Sereal/test/cases/all/test22.srl new file mode 100644 index 0000000000000000000000000000000000000000..98d5dc0a54acf882cdee748e60e84a97db456f65 GIT binary patch literal 7 OcmcEhT$IDiAP)cwb^>. diff --git a/Erlang/Sereal/test/cases/all/test25.srl b/Erlang/Sereal/test/cases/all/test25.srl new file mode 100644 index 0000000000000000000000000000000000000000..149d3e837e64d0ef158c879d9ec6367ba66028f5 GIT binary patch literal 22 dcmcEhT$E$XAkfQD!Fbx=$IH!)!Q>g69spHA2I2q! literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test26.eterm b/Erlang/Sereal/test/cases/all/test26.eterm new file mode 100644 index 000000000..222287e79 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test26.eterm @@ -0,0 +1,2 @@ +%% snappy compressed srl document +<<"abcabcabcabcabcabcabc">>. diff --git a/Erlang/Sereal/test/cases/all/test26.srl b/Erlang/Sereal/test/cases/all/test26.srl new file mode 100644 index 0000000000000000000000000000000000000000..7846e6331829060e8ba3946baf80f5d5a89b7b4d GIT binary patch literal 16 XcmcEhT$H2Cz$wO4nwXUA#>@Z!D!>Fm literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test6.eterm b/Erlang/Sereal/test/cases/all/test6.eterm new file mode 100644 index 000000000..7a4306928 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test6.eterm @@ -0,0 +1 @@ +-16. diff --git a/Erlang/Sereal/test/cases/all/test6.srl b/Erlang/Sereal/test/cases/all/test6.srl new file mode 100644 index 0000000000000000000000000000000000000000..8021a6c458aac794152f62dad724fcbf16354888 GIT binary patch literal 7 OcmcC5F3MqI5C8xOWC5W7 literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test7.eterm b/Erlang/Sereal/test/cases/all/test7.eterm new file mode 100644 index 000000000..2dabd8c68 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test7.eterm @@ -0,0 +1 @@ +-1. diff --git a/Erlang/Sereal/test/cases/all/test7.srl b/Erlang/Sereal/test/cases/all/test7.srl new file mode 100644 index 0000000000000000000000000000000000000000..45311cbceda824df30dd8db9411c5b730a1dd7ca GIT binary patch literal 7 OcmcC5F3MqIkOu$=a{;gb literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test8.eterm b/Erlang/Sereal/test/cases/all/test8.eterm new file mode 100644 index 000000000..17a5757b1 --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test8.eterm @@ -0,0 +1 @@ +0. diff --git a/Erlang/Sereal/test/cases/all/test8.srl b/Erlang/Sereal/test/cases/all/test8.srl new file mode 100644 index 0000000000000000000000000000000000000000..08f1c4d42819485e8fa371b3d7b446805160b5d0 GIT binary patch literal 7 OcmcC5F3MqIU;qFJQ~{9y literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/all/test9.eterm b/Erlang/Sereal/test/cases/all/test9.eterm new file mode 100644 index 000000000..d54c6b66b --- /dev/null +++ b/Erlang/Sereal/test/cases/all/test9.eterm @@ -0,0 +1 @@ +1. diff --git a/Erlang/Sereal/test/cases/all/test9.srl b/Erlang/Sereal/test/cases/all/test9.srl new file mode 100644 index 0000000000000000000000000000000000000000..402a39236e4343fd78f44e4d069323982956b035 GIT binary patch literal 7 OcmcC5F3MqIU<3dNRRNL! literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/arrayref_list/test17.eterm b/Erlang/Sereal/test/cases/arrayref_list/test17.eterm new file mode 100644 index 000000000..57afcca04 --- /dev/null +++ b/Erlang/Sereal/test/cases/arrayref_list/test17.eterm @@ -0,0 +1 @@ +[]. diff --git a/Erlang/Sereal/test/cases/arrayref_list/test17.srl b/Erlang/Sereal/test/cases/arrayref_list/test17.srl new file mode 100644 index 0000000000000000000000000000000000000000..987942bc2c739dc20c0034bec14e620040735ff6 GIT binary patch literal 7 OcmcC5F3MqIZ~y=ZlmXEI literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/arrayref_list/test18.eterm b/Erlang/Sereal/test/cases/arrayref_list/test18.eterm new file mode 100644 index 000000000..270d2dbf5 --- /dev/null +++ b/Erlang/Sereal/test/cases/arrayref_list/test18.eterm @@ -0,0 +1 @@ +[1, 2, 3]. diff --git a/Erlang/Sereal/test/cases/arrayref_list/test18.srl b/Erlang/Sereal/test/cases/arrayref_list/test18.srl new file mode 100644 index 0000000000000000000000000000000000000000..f2e963972803f2dde54060dcf56e0f89881ce2e8 GIT binary patch literal 10 RcmcC5F3MqIaAss;1^^Bn0onin literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/arrayref_list/test2.eterm b/Erlang/Sereal/test/cases/arrayref_list/test2.eterm new file mode 100644 index 000000000..f0cdd2282 --- /dev/null +++ b/Erlang/Sereal/test/cases/arrayref_list/test2.eterm @@ -0,0 +1 @@ +[2, [1, <<"foo">>, <<"bar">>]]. diff --git a/Erlang/Sereal/test/cases/arrayref_list/test2.srl b/Erlang/Sereal/test/cases/arrayref_list/test2.srl new file mode 100644 index 0000000000000000000000000000000000000000..67c21c87fed58438d08d7a7074486898d71abc16 GIT binary patch literal 20 bcmcEhT$IDi;KZb%&CHmbmY<)Tlvo4+JY)sz literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/arrayref_list/test20.eterm b/Erlang/Sereal/test/cases/arrayref_list/test20.eterm new file mode 100644 index 000000000..2a341390f --- /dev/null +++ b/Erlang/Sereal/test/cases/arrayref_list/test20.eterm @@ -0,0 +1 @@ +[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999,1000]. diff --git a/Erlang/Sereal/test/cases/arrayref_list/test20.srl b/Erlang/Sereal/test/cases/arrayref_list/test20.srl new file mode 100644 index 0000000000000000000000000000000000000000..c36fce267e6a3d0aef2f8687730b1c3b9056fc90 GIT binary patch literal 2868 zcmV~$17H~F8Gzw^{AZi*eA~^|&9-ghrn^~}A}P|QOqD7%QoCttrA?jMOln(uo`)WN z0}r+rQS-?1_$NH^Nl$*tQ=i7uc?QqqSv;HP@LZn9^LYU;v@nj@J8Omn|TXwkJTYksG{GLDXNB+d0d4xy#3xDNr{GEUBPyWTf`49i)e>_Ho&_U=Z zbP_rXl|q%!Md&JY6S@o4LJy&*P$TpbdJBDozCu5tzfda-5C#f^gu%iPp-vbo3=@V6 zBZQGcy)a4`EsPPy3gd+F!USQWFiDs!Gze3Kslqg2x-dhSDa;aP3v-0I!aQNVus~QS zGzyD^CSkF#L|7^;6P61rgq6Z7VYRSESSvIO>xA_}i?Bi1C~Oio3tNP(!Zx8**e>i4 zb_%%tA;rf^HRE!+|A3ipKjLWR;n>8Ny4IxCe*mC{A&s&rGjE7eL5rKeJ( z^ip~&eU!dRKc&A?s|-*EDua~4$`GYa8LA9ZhAShKkxIQXN*S$;QN}9cl<~?0Wuh`k znXEJ@Q`-g+LU(Xq;g6*t(;NLD(95*$_3@3a!I+YTv4tn*OcqZ4dteCOS!GwQSK`D zl>17B(ZT3wbTT>{l}44(#pr5uGrAkqMh~N>QDgKndK-O=zD7Tzzfo%pFa{cfjKRhb zqs|y=3^RrsBaD$oy)nudZHzI-8sm)d#sp)cG0B*0G#FEism3&8x-r9;Y0NTa8*_}g z#yn%bvA|epG#ZPHCS$R_%9GnN}GjFrYJW3{oySZg#J>x}hAi?PAjXlybz8(WO6 z#x|qX*lz4Fb{e~k-Nqhcud&bAZyYcV8i$O-#u4MFam+YwoG{vqcH^XR$~bMDG0qz2 zjPu3?&6Y^rg6)-ZQL>L8uyI*MupSC>F9KFIy;q4mD9!P>U49u zJJn7Pr>9fn^m2MTeVo2dKc~M_>kM!PI)j|S&Jd^08R`skhC3sikxsoc${Fp9amG61 zobk>CXQDI7nd~$;Q=F;JG-tXq!MV1X zJ1d-(&MIfMv&LELG&}2@^-hbk!P)3+ayC0#oUP6_r`6f+>~MBEyPVz59%rw!&)M%B za1J_$oWsr$=csecIqsZr+MIUhq;twS?VNGWI_I48&IRY9bIG~vTyd^C*PQFl4d`p;bP75Ll|fa|CFmM-3%Uo@L64wkP!seDdIx=izCpjBe^46? z2nGg&g2BO%pe`613=4(_BZ84ZeK0B*9gGRa2IGS9!GvI9Fe#WEGz3$Esll{hdN3oH z8O#c12XlhC!MtF8upn3%GzN=;reJZfBv=|O3zi2ff|bFlV0Ex2SQ|74>w@(`ORyo> z7;FkQ2U~)z!M30^*dFW%b_TnG-NBw%ooSW^gOG9oz}-2KR#dK}FIb>6mm%IwzG$ zRnjHtnsiILC)G)hq-Ro-^h$aseUiROzodUsn+!+>CWDf}$&jQj8JY}Bh9@JEkx6|r zDjA)ONyaAQlJUueWMVQYnVd8vQlDWyeWPY+BS(r2?i;|{f zak3;?nk-9}Co7Vb$*N>^vL;!ZG$-ql^+`*zA=#L0N;W53lC8_~PdyOQ0> zo@8&bFWH|QNDd~4lEcZ7E4iKAN$w{1lKV+Tp+li#p;MuAp|Vg_=u+rf=vL@ns4ny<^eogAdKG#X z`V{&W`W5;YY6}Aj0}F!+g9}3nb%mjYVTIv^5rvV3`ogHf=)#!7*uuEN_`-z3#KNS) zP6t)(&6>,undefined,<<"defg">>]. diff --git a/Erlang/Sereal/test/cases/arrayref_list/test27.srl b/Erlang/Sereal/test/cases/arrayref_list/test27.srl new file mode 100644 index 0000000000000000000000000000000000000000..789c37346dfa9a22f456c773555857b4422ef68c GIT binary patch literal 18 ZcmcEhT$IDi;GB|}l$@fPl9HO14gfu224VmJ literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/arrayref_list/test29.eterm b/Erlang/Sereal/test/cases/arrayref_list/test29.eterm new file mode 100644 index 000000000..2e60d37b4 --- /dev/null +++ b/Erlang/Sereal/test/cases/arrayref_list/test29.eterm @@ -0,0 +1,2 @@ +%% REFP support +[<<"bbb">>,<<"bbb">>]. diff --git a/Erlang/Sereal/test/cases/arrayref_list/test29.srl b/Erlang/Sereal/test/cases/arrayref_list/test29.srl new file mode 100644 index 0000000000000000000000000000000000000000..5d4f5570a0afdf16fceb0f82d9c0f5b77ae2da88 GIT binary patch literal 14 VcmcEhT$IDi;H2?5DJe;l82~3d1hD`B literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/maps_map/test5.eterm b/Erlang/Sereal/test/cases/maps_map/test5.eterm new file mode 100644 index 000000000..70678f63e --- /dev/null +++ b/Erlang/Sereal/test/cases/maps_map/test5.eterm @@ -0,0 +1,14 @@ +{array,2,10,undefined, + { undefined, + #{<<"foo">> => <<"oh wait Im reallllllllllllllllllly long a long binary a lot of stuff there">>}, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined + } +}. + diff --git a/Erlang/Sereal/test/cases/maps_map/test5.srl b/Erlang/Sereal/test/cases/maps_map/test5.srl new file mode 100644 index 0000000000000000000000000000000000000000..792c61ec73ebb93f68f79cdd3075a067570e6b32 GIT binary patch literal 91 zcmcEhT$IDi;H0Xd#h9FypReYXpP^8mm|3FWnX6EgnwWzLDiw0_^U@U(AyiUkUSd%t Wm{+2ZpQcb;Qks^gP?C{alnMZ+JRoHN literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/maps_tuple/test1.eterm b/Erlang/Sereal/test/cases/maps_tuple/test1.eterm new file mode 100644 index 000000000..767442b1b --- /dev/null +++ b/Erlang/Sereal/test/cases/maps_tuple/test1.eterm @@ -0,0 +1,5 @@ +{array,2,10,undefined, + {{[{<<"a">>,1}]}, + {[{<<"b">>,2}]}, + undefined,undefined,undefined,undefined,undefined,undefined, + undefined,undefined}}. diff --git a/Erlang/Sereal/test/cases/maps_tuple/test1.srl b/Erlang/Sereal/test/cases/maps_tuple/test1.srl new file mode 100644 index 0000000000000000000000000000000000000000..d504c9652d816eca55ced149bad6ebfb3685a3f1 GIT binary patch literal 19 XcmcEhT$IDi;H06&n3%{2B9oW^HE0Ba literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/maps_tuple/test3.eterm b/Erlang/Sereal/test/cases/maps_tuple/test3.eterm new file mode 100644 index 000000000..ad313db91 --- /dev/null +++ b/Erlang/Sereal/test/cases/maps_tuple/test3.eterm @@ -0,0 +1 @@ +{[{<<"foo">>, <<"bar">>}]}. diff --git a/Erlang/Sereal/test/cases/maps_tuple/test3.srl b/Erlang/Sereal/test/cases/maps_tuple/test3.srl new file mode 100644 index 0000000000000000000000000000000000000000..f9e752fef94608b5a746f438e39b337fb68fa106 GIT binary patch literal 17 YcmcEhT$IDiprOT>oR*)ToRnAu05SaqqyPW_ literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/cases/maps_tuple/test4.eterm b/Erlang/Sereal/test/cases/maps_tuple/test4.eterm new file mode 100644 index 000000000..cb89f86c0 --- /dev/null +++ b/Erlang/Sereal/test/cases/maps_tuple/test4.eterm @@ -0,0 +1,13 @@ +{[ + {<<"more">>, + {array,5,10,undefined, + {1,2,3,4,5,undefined,undefined,undefined,undefined,undefined}}}, + + {<<"data">>, + {array,2,10,undefined, + {{[{<<"a">>,<<"b">>},{<<"c">>,<<"d">>},{<<"e">>,<<"f">>}]}, + {[{<<"foo">>,4.2}]}, + undefined,undefined,undefined,undefined, + undefined,undefined,undefined,undefined}}} + +]}. diff --git a/Erlang/Sereal/test/cases/maps_tuple/test4.srl b/Erlang/Sereal/test/cases/maps_tuple/test4.srl new file mode 100644 index 0000000000000000000000000000000000000000..ffa1c6b5d63aa23daa75e49fbef12d217860829e GIT binary patch literal 61 zcmcEhT$IDiprOT-l9E`GsG-fIp~ak-nwXZDoS2fBn3$xY#h9FypRauO30@=S_<=y2Qw7%nK9SDUp%@hUQE+ zOiT}qK#Xjbcpl3Vn}(c`FmWxz9_Bq_<@t(7+>q&n^A}x{l89w`RD)SO+pB?8Y?C&X zoLmKVH0`{zc9>ADW{eI&VF+dlsZnn~DV&CG8RJx1##_~NLQ+?-pAZuA{)e1e z%e<*$)GCvyWuDhDnJS~#GWY8ky~>npndv&FTxG`pGb&Dft7FEi%+!A-gHylim?>bm zIW?_ge@+k+$wjG9Eo7HuF zSl31CUHs>_-_CgZ%J%pY7mapK^uD#iH(p-4{_*9^()m8@hA5jY0waPL|cez5#Vn>VnxeVL{IoLOFy-Jv$iKA{y1dPnuUZ{MmQl z(WWa2?QgMosL##=`8eQt#tTlFnvU%Sb8;w3#>$RMHN&!sL3)(99^^+*Lq|Lr?_e>W znk7;Q55YNoF$80y5i{b(8kPxrPQ}fV5!9m;;QlKmrR+LIlBJ$fFtQ^CEjYGvlE5=z zDXHM*D~GEOio!0rQv15Z;b|y#4SDR7QNL-!-A0?4zuMTjMVUWfEWdevfBM_6#uoGs DAWtCu literal 0 HcmV?d00001 diff --git a/Erlang/Sereal/test/looks_like_sereal_test.erl b/Erlang/Sereal/test/looks_like_sereal_test.erl new file mode 100644 index 000000000..121219dbd --- /dev/null +++ b/Erlang/Sereal/test/looks_like_sereal_test.erl @@ -0,0 +1,22 @@ +-module(looks_like_sereal_test). + +-include_lib("eunit/include/eunit.hrl"). + +-define(TEST_CASES, + % input error message + [ + { <<"">>, 'Unsupported Sereal versions/protocol' }, + { <<"=srl">>, 'Unsupported Sereal versions/protocol' }, + { <<"=srl\x03\x00\x25">>, 'Unsupported Sereal versions/protocol' }, + { <<"=\xF3rl\x02\x00\x25">>, 'Unsupported Sereal versions/protocol' }, + { <<"=\xF3rl\x00\x00\x25">>, 'Unsupported Sereal versions/protocol' }, + { <<"undefined">>, 'Sereal document encoded in an unknown format' }, + { <<"\x3d\x73\x72\x6c\x02\x00\x0f,\x11\x11">>, 'Wrong structured Sereal' } + ]). + +wrong_formatted_sereals_test() -> + lists:map(fun ({Input, Msg}) -> + ?assertThrow({error, {_, Msg}}, + sereal:decode(Input)) + end, + ?TEST_CASES). diff --git a/Erlang/Sereal/test/round_trip.erl b/Erlang/Sereal/test/round_trip.erl new file mode 100644 index 000000000..3932bc2f9 --- /dev/null +++ b/Erlang/Sereal/test/round_trip.erl @@ -0,0 +1,105 @@ +-module(round_trip). + +-include_lib("eunit/include/eunit.hrl"). + +-define(TEST_CASES, + [ + % numbers + 1, + 0, + 128, + 2147483648, + 4294967296, + 1.111111, + 0.0000001, + 1.25, + -1, + -2, + -191, + + % atoms + a, + abcd, + aabbccddeeffgghhii, + zyxwvutsrqpon, + + % binaries + <<"">>, + <<"1">>, + <<"\x45, \x52, \x4c, \x41, \x4E, \x47">>, + <<"quick fox jumps over the lazy dog">> + + ]). + +-define(LIST_CASES, + [ + [], + [1], + [1,2,3], + [[], []], + [[1], [2], [3]], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + ]). + +decode_encode(A) -> + decode_encode(A, []). + +decode_encode(A, DecodeOptions) -> + decode_encode(A, DecodeOptions, []). + +decode_encode(A, DecodeOptions, EncodeOptions) -> + {ok, [Result]} = sereal:decode(sereal:encode(A, EncodeOptions), DecodeOptions), + Result. + +array_to_list(A) -> + case array:is_array(A) of + true -> array:to_list(array:map(fun (_, E) -> array_to_list(E) end, A)); + _ -> A + end. + +test_elem(A) -> + test_elem(A, []). + +test_elem(A, EncoderOptions) -> + Z = decode_encode(A, [], EncoderOptions), + B = array_to_list(Z), + ?assertEqual(A, B). + +list_test() -> + [ test_elem(Case) || Case <- ?LIST_CASES ], + [ test_elem(Case, [{snappy}]) || Case <- ?LIST_CASES ], + [ test_elem(Case, [{zlib, 9}]) || Case <- ?LIST_CASES ], + [ Case = decode_encode(Case, [{arrayref_to_list}]) || Case <- ?LIST_CASES ], + [ Case = decode_encode(Case, [{arrayref_to_list}], [{snappy}]) || Case <- ?LIST_CASES ], + [ Case = decode_encode(Case, [{arrayref_to_list}], [{zlib, 9}]) || Case <- ?LIST_CASES ]. + +atom_to_binary(A) -> + list_to_binary(atom_to_list(A)). + +test_case( Expected ) -> + test_case(Expected, []). + +test_case( Expected, EncodeOptions ) -> + if is_atom(Expected) -> ?assertEqual(atom_to_binary(Expected), decode_encode(Expected, [], EncodeOptions)); + true -> ?assertEqual(Expected, decode_encode(Expected, [], EncodeOptions)) + end. + +basic_test() -> + [test_case(Case) || Case <- ?TEST_CASES], + [test_case(Case, [{snappy}]) || Case <- ?TEST_CASES], + [test_case(Case, [{zlib, 9}]) || Case <- ?TEST_CASES]. + + +-ifdef(SEREAL_MAP_SUPPORT). + +-define(MAP_CASES, [ + #{<<"a">> => 1}, + #{<<"a">> => 1, <<"b">> => 2}, + #{<<"a">> => #{<<"aa">> => #{<<"aaa">> => 123}}}, + #{<<"a">> => 1, <<"b">> => 2, <<"c">> => 3, <<"d">> => 4, <<"e">> => 5, <<"f">> => 6, <<"g">> => 7, <<"h">> => 8, <<"i">> => 9, <<"j">> => 10, <<"k">> => 11, <<"l">> => 12, <<"m">> => 13, <<"n">> => 14, <<"o">> => 15, <<"p">> => 16, <<"q">> => 17, <<"r">> => 18, <<"s">> => 19, <<"t">> => 20, <<"u">> => 21, <<"v">> => 22, <<"w">> => 23, <<"x">> => 24, <<"y">> => 25, <<"z">> => 26, <<"a1">> => 27, <<"a2">> => 28, <<"a3">> => 29, <<"a4">> => 30, <<"a5">> => 31, <<"a6">> => 32, <<"a7">> => 33 } + ]). + +maps_test() -> + [ test_case(Case) || Case <- ?MAP_CASES ]. + +-endif.