From cd0020e7ab56f06d68f9039993e652140060fa5f Mon Sep 17 00:00:00 2001 From: Jonathan Worthington Date: Thu, 16 Aug 2018 18:53:18 +0200 Subject: [PATCH] Re-instate use of int cache in fast boxing For JIT, we now emit assembly that does the comparisons and then does a fetch from the integer cache if applicable. Thus the only real call is to obtain memory from the GC if we need to box, the value then being poked into the object at the appropriate location. Also stub in as-yet unimplemented and unused spesh ops that we'll use for optimizing boxing to a P6bigint embedded in a P6opaque. --- lib/MAST/Ops.nqp | 126 +++++++++++++++++++++++++-------------- src/6model/reprs/P6int.c | 30 +++++----- src/core/intcache.c | 14 +++++ src/core/intcache.h | 1 + src/core/interp.c | 49 +++++++++++++++ src/core/oplabels.h | 8 +-- src/core/oplist | 11 ++++ src/core/ops.c | 62 ++++++++++++++++++- src/core/ops.h | 52 ++++++++-------- src/jit/graph.c | 3 + src/jit/x64/emit.dasc | 60 ++++++++++++++----- 11 files changed, 313 insertions(+), 103 deletions(-) diff --git a/lib/MAST/Ops.nqp b/lib/MAST/Ops.nqp index 561d3155f2..b7c70c12d5 100644 --- a/lib/MAST/Ops.nqp +++ b/lib/MAST/Ops.nqp @@ -864,29 +864,33 @@ BEGIN { 2182, 2185, 2188, - 2191, - 2194, - 2197, - 2200, + 2193, + 2198, 2204, - 2208, - 2211, - 2214, - 2215, - 2217, + 2210, + 2213, + 2216, 2219, - 2221, - 2225, - 2227, - 2229, - 2229, - 2229, + 2222, + 2226, 2230, - 2231, - 2231, - 2232, - 2234, - 2238); + 2233, + 2236, + 2237, + 2239, + 2241, + 2243, + 2247, + 2249, + 2251, + 2251, + 2251, + 2252, + 2253, + 2253, + 2254, + 2256, + 2260); MAST::Ops.WHO<@counts> := nqp::list_i(0, 2, 2, @@ -1747,6 +1751,10 @@ BEGIN { 3, 3, 3, + 5, + 5, + 6, + 6, 3, 3, 3, @@ -3959,6 +3967,28 @@ BEGIN { 65, 16, 57, + 66, + 16, + 128, + 16, + 33, + 66, + 16, + 128, + 16, + 33, + 66, + 16, + 128, + 16, + 33, + 16, + 66, + 16, + 128, + 16, + 33, + 16, 34, 65, 16, @@ -4871,30 +4901,34 @@ BEGIN { 'sp_p6obind_i', 857, 'sp_p6obind_n', 858, 'sp_p6obind_s', 859, - 'sp_deref_get_i64', 860, - 'sp_deref_get_n', 861, - 'sp_deref_bind_i64', 862, - 'sp_deref_bind_n', 863, - 'sp_getlexvia_o', 864, - 'sp_getlexvia_ins', 865, - 'sp_getstringfrom', 866, - 'sp_getwvalfrom', 867, - 'sp_jit_enter', 868, - 'sp_boolify_iter', 869, - 'sp_boolify_iter_arr', 870, - 'sp_boolify_iter_hash', 871, - 'sp_cas_o', 872, - 'sp_atomicload_o', 873, - 'sp_atomicstore_o', 874, - 'prof_enter', 875, - 'prof_enterspesh', 876, - 'prof_enterinline', 877, - 'prof_enternative', 878, - 'prof_exit', 879, - 'prof_allocated', 880, - 'ctw_check', 881, - 'coverage_log', 882, - 'breakpoint', 883); + 'sp_fastbox_i', 860, + 'sp_fastbox_bi', 861, + 'sp_fastbox_i_ic', 862, + 'sp_fastbox_bi_ic', 863, + 'sp_deref_get_i64', 864, + 'sp_deref_get_n', 865, + 'sp_deref_bind_i64', 866, + 'sp_deref_bind_n', 867, + 'sp_getlexvia_o', 868, + 'sp_getlexvia_ins', 869, + 'sp_getstringfrom', 870, + 'sp_getwvalfrom', 871, + 'sp_jit_enter', 872, + 'sp_boolify_iter', 873, + 'sp_boolify_iter_arr', 874, + 'sp_boolify_iter_hash', 875, + 'sp_cas_o', 876, + 'sp_atomicload_o', 877, + 'sp_atomicstore_o', 878, + 'prof_enter', 879, + 'prof_enterspesh', 880, + 'prof_enterinline', 881, + 'prof_enternative', 882, + 'prof_exit', 883, + 'prof_allocated', 884, + 'ctw_check', 885, + 'coverage_log', 886, + 'breakpoint', 887); MAST::Ops.WHO<@names> := nqp::list_s('no_op', 'const_i8', 'const_i16', @@ -5755,6 +5789,10 @@ BEGIN { 'sp_p6obind_i', 'sp_p6obind_n', 'sp_p6obind_s', + 'sp_fastbox_i', + 'sp_fastbox_bi', + 'sp_fastbox_i_ic', + 'sp_fastbox_bi_ic', 'sp_deref_get_i64', 'sp_deref_get_n', 'sp_deref_bind_i64', diff --git a/src/6model/reprs/P6int.c b/src/6model/reprs/P6int.c index 0f0c0ca4b6..d9d03b6ce9 100644 --- a/src/6model/reprs/P6int.c +++ b/src/6model/reprs/P6int.c @@ -204,25 +204,23 @@ static void spesh(MVMThreadContext *tc, MVMSTable *st, MVMSpeshGraph *g, MVMSpes switch (ins->info->opcode) { case MVM_OP_box_i: { if (repr_data->bits == 64 && !(st->mode_flags & MVM_FINALIZE_TYPE)) { - /* Prepend a fastcreate instruction. */ - MVMSpeshIns *fastcreate = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); + /* Turn into a sp_fastbox_i[_ic] instruction. */ + MVMint32 int_cache_type_idx = MVM_intcache_type_index(tc, st->WHAT); MVMSpeshFacts *tgt_facts = MVM_spesh_get_facts(tc, g, ins->operands[0]); - fastcreate->info = MVM_op_get_op(MVM_OP_sp_fastcreate); - fastcreate->operands = MVM_spesh_alloc(tc, g, 3 * sizeof(MVMSpeshOperand)); - fastcreate->operands[0] = ins->operands[0]; - tgt_facts->writer = fastcreate; - fastcreate->operands[1].lit_i16 = st->size; - fastcreate->operands[2].lit_i16 = MVM_spesh_add_spesh_slot(tc, g, (MVMCollectable *)st); - MVM_spesh_manipulate_insert_ins(tc, bb, ins->prev, fastcreate); + MVMSpeshOperand *orig_operands = ins->operands; + ins->info = MVM_op_get_op(int_cache_type_idx < 0 + ? MVM_OP_sp_fastbox_i + : MVM_OP_sp_fastbox_i_ic); + ins->operands = MVM_spesh_alloc(tc, g, 6 * sizeof(MVMSpeshOperand)); + ins->operands[0] = orig_operands[0]; + ins->operands[1].lit_i16 = st->size; + ins->operands[2].lit_i16 = MVM_spesh_add_spesh_slot(tc, g, (MVMCollectable *)st); + ins->operands[3].lit_i16 = offsetof(MVMP6int, body.value); + ins->operands[4] = orig_operands[1]; + ins->operands[5].lit_i16 = (MVMint16)int_cache_type_idx; + MVM_spesh_usages_delete_by_reg(tc, g, orig_operands[2], ins); tgt_facts->flags |= MVM_SPESH_FACT_KNOWN_TYPE | MVM_SPESH_FACT_CONCRETE; tgt_facts->type = st->WHAT; - - /* Change instruction to a bind. */ - MVM_spesh_usages_delete_by_reg(tc, g, ins->operands[2], ins); - ins->info = MVM_op_get_op(MVM_OP_sp_bind_i64); - ins->operands[2] = ins->operands[1]; - ins->operands[1].lit_i16 = offsetof(MVMP6int, body.value); - MVM_spesh_usages_add_by_reg(tc, g, ins->operands[0], ins); } break; } diff --git a/src/core/intcache.c b/src/core/intcache.c index 987a86c3bc..89f033ee6b 100644 --- a/src/core/intcache.c +++ b/src/core/intcache.c @@ -51,3 +51,17 @@ MVMObject *MVM_intcache_get(MVMThreadContext *tc, MVMObject *type, MVMint64 valu } return NULL; } + +MVMint32 MVM_intcache_type_index(MVMThreadContext *tc, MVMObject *type) { + int type_index; + int found = -1; + uv_mutex_lock(&tc->instance->mutex_int_const_cache); + for (type_index = 0; type_index < 4; type_index++) { + if (tc->instance->int_const_cache->types[type_index] == type) { + found = type_index; + break; + } + } + uv_mutex_unlock(&tc->instance->mutex_int_const_cache); + return found; +} diff --git a/src/core/intcache.h b/src/core/intcache.h index 100197d84d..762a023105 100644 --- a/src/core/intcache.h +++ b/src/core/intcache.h @@ -5,3 +5,4 @@ struct MVMIntConstCache { void MVM_intcache_for(MVMThreadContext *tc, MVMObject *type); MVMObject *MVM_intcache_get(MVMThreadContext *tc, MVMObject *type, MVMint64 value); +MVMint32 MVM_intcache_type_index(MVMThreadContext *tc, MVMObject *type); diff --git a/src/core/interp.c b/src/core/interp.c index 809db437f0..e0465c8e9f 100644 --- a/src/core/interp.c +++ b/src/core/interp.c @@ -5790,6 +5790,55 @@ void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContex cur_op += 6; goto NEXT; } + OP(sp_fastbox_i): { + MVMuint16 size = GET_UI16(cur_op, 2); + MVMObject *obj = MVM_gc_allocate_nursery(tc, size); +#if MVM_GC_DEBUG + if (tc->allocate_in_gen2) + MVM_panic(1, "Illegal use of sp_fastbox_i when gen2 allocation flag set"); +#endif + obj->st = (MVMSTable *)tc->cur_frame->effective_spesh_slots[GET_UI16(cur_op, 4)]; + obj->header.size = size; + obj->header.owner = tc->thread_id; + *((MVMint64 *)((char *)obj + GET_UI16(cur_op, 6))) = GET_REG(cur_op, 8).i64; + GET_REG(cur_op, 0).o = obj; + cur_op += 10; + goto NEXT; + } + OP(sp_fastbox_bi): { + //w(obj) int16 sslot int16 r(int64) :pure + MVM_panic(1, "sp_fastbox_bi NYI"); + cur_op += 10; + goto NEXT; + } + OP(sp_fastbox_i_ic): { + MVMint64 value = GET_REG(cur_op, 8).i64; + if (value >= -1 && value < 15) { + MVMint16 slot = GET_UI16(cur_op, 10); + GET_REG(cur_op, 0).o = tc->instance->int_const_cache->cache[slot][value + 1]; + } + else { + MVMuint16 size = GET_UI16(cur_op, 2); + MVMObject *obj = MVM_gc_allocate_nursery(tc, size); +#if MVM_GC_DEBUG + if (tc->allocate_in_gen2) + MVM_panic(1, "Illegal use of sp_fastbox_i when gen2 allocation flag set"); +#endif + obj->st = (MVMSTable *)tc->cur_frame->effective_spesh_slots[GET_UI16(cur_op, 4)]; + obj->header.size = size; + obj->header.owner = tc->thread_id; + *((MVMint64 *)((char *)obj + GET_UI16(cur_op, 6))) = value; + GET_REG(cur_op, 0).o = obj; + } + cur_op += 12; + goto NEXT; + } + OP(sp_fastbox_bi_ic): { + //w(obj) int16 sslot int16 r(int64) :pure + MVM_panic(1, "sp_fastbox_bi_ic NYI"); + cur_op += 10; + goto NEXT; + } OP(sp_deref_get_i64): { MVMObject *o = GET_REG(cur_op, 2).o; MVMint64 **target = ((MVMint64 **)((char *)o + GET_UI16(cur_op, 4))); diff --git a/src/core/oplabels.h b/src/core/oplabels.h index 766053fa3c..08fcf76da0 100644 --- a/src/core/oplabels.h +++ b/src/core/oplabels.h @@ -861,6 +861,10 @@ static const void * const LABELS[] = { &&OP_sp_p6obind_i, &&OP_sp_p6obind_n, &&OP_sp_p6obind_s, + &&OP_sp_fastbox_i, + &&OP_sp_fastbox_bi, + &&OP_sp_fastbox_i_ic, + &&OP_sp_fastbox_bi_ic, &&OP_sp_deref_get_i64, &&OP_sp_deref_get_n, &&OP_sp_deref_bind_i64, @@ -1021,10 +1025,6 @@ static const void * const LABELS[] = { NULL, NULL, NULL, - NULL, - NULL, - NULL, - NULL, &&OP_CALL_EXTOP, &&OP_CALL_EXTOP, &&OP_CALL_EXTOP, diff --git a/src/core/oplist b/src/core/oplist index 155855d8aa..2866cce4af 100644 --- a/src/core/oplist +++ b/src/core/oplist @@ -973,6 +973,17 @@ sp_p6obind_i .s r(obj) int16 r(int64) sp_p6obind_n .s r(obj) int16 r(num64) sp_p6obind_s .s r(obj) int16 r(str) +# Lowered int and bigint box ops. These combine a fastcreate and a writing of +# the value as appropriate. The first two read operands have the same meaning +# as in sp_fastcreate, the third is the offset in the object to box to, and +# the final operand is the integer to box. The _ic variants include an extra +# argument which is the type index in the integer cache; they do a lookup in +# the integer cache before performing a box operation. +sp_fastbox_i .s w(obj) int16 sslot int16 r(int64) :pure +sp_fastbox_bi .s w(obj) int16 sslot int16 r(int64) :pure +sp_fastbox_i_ic .s w(obj) int16 sslot int16 r(int64) int16 :pure +sp_fastbox_bi_ic .s w(obj) int16 sslot int16 r(int64) int16 :pure + # Follow a pointer at an offset to an object and get/store a value there. sp_deref_get_i64 .s w(int64) r(obj) int16 :pure sp_deref_get_n .s w(num64) r(obj) int16 :pure diff --git a/src/core/ops.c b/src/core/ops.c index f2c40199c6..6597d4f504 100644 --- a/src/core/ops.c +++ b/src/core/ops.c @@ -12892,6 +12892,66 @@ static const MVMOpInfo MVM_op_infos[] = { 0, { MVM_operand_read_reg | MVM_operand_obj, MVM_operand_int16, MVM_operand_read_reg | MVM_operand_str } }, + { + MVM_OP_sp_fastbox_i, + "sp_fastbox_i", + ".s", + 5, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + { MVM_operand_write_reg | MVM_operand_obj, MVM_operand_int16, MVM_operand_spesh_slot, MVM_operand_int16, MVM_operand_read_reg | MVM_operand_int64 } + }, + { + MVM_OP_sp_fastbox_bi, + "sp_fastbox_bi", + ".s", + 5, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + { MVM_operand_write_reg | MVM_operand_obj, MVM_operand_int16, MVM_operand_spesh_slot, MVM_operand_int16, MVM_operand_read_reg | MVM_operand_int64 } + }, + { + MVM_OP_sp_fastbox_i_ic, + "sp_fastbox_i_ic", + ".s", + 6, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + { MVM_operand_write_reg | MVM_operand_obj, MVM_operand_int16, MVM_operand_spesh_slot, MVM_operand_int16, MVM_operand_read_reg | MVM_operand_int64, MVM_operand_int16 } + }, + { + MVM_OP_sp_fastbox_bi_ic, + "sp_fastbox_bi_ic", + ".s", + 6, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + { MVM_operand_write_reg | MVM_operand_obj, MVM_operand_int16, MVM_operand_spesh_slot, MVM_operand_int16, MVM_operand_read_reg | MVM_operand_int64, MVM_operand_int16 } + }, { MVM_OP_sp_deref_get_i64, "sp_deref_get_i64", @@ -13251,7 +13311,7 @@ static const MVMOpInfo MVM_op_infos[] = { }, }; -static const unsigned short MVM_op_counts = 884; +static const unsigned short MVM_op_counts = 888; MVM_PUBLIC const MVMOpInfo * MVM_op_get_op(unsigned short op) { if (op >= MVM_op_counts) diff --git a/src/core/ops.h b/src/core/ops.h index 6ea3242beb..0e3b639bb9 100644 --- a/src/core/ops.h +++ b/src/core/ops.h @@ -861,30 +861,34 @@ #define MVM_OP_sp_p6obind_i 857 #define MVM_OP_sp_p6obind_n 858 #define MVM_OP_sp_p6obind_s 859 -#define MVM_OP_sp_deref_get_i64 860 -#define MVM_OP_sp_deref_get_n 861 -#define MVM_OP_sp_deref_bind_i64 862 -#define MVM_OP_sp_deref_bind_n 863 -#define MVM_OP_sp_getlexvia_o 864 -#define MVM_OP_sp_getlexvia_ins 865 -#define MVM_OP_sp_getstringfrom 866 -#define MVM_OP_sp_getwvalfrom 867 -#define MVM_OP_sp_jit_enter 868 -#define MVM_OP_sp_boolify_iter 869 -#define MVM_OP_sp_boolify_iter_arr 870 -#define MVM_OP_sp_boolify_iter_hash 871 -#define MVM_OP_sp_cas_o 872 -#define MVM_OP_sp_atomicload_o 873 -#define MVM_OP_sp_atomicstore_o 874 -#define MVM_OP_prof_enter 875 -#define MVM_OP_prof_enterspesh 876 -#define MVM_OP_prof_enterinline 877 -#define MVM_OP_prof_enternative 878 -#define MVM_OP_prof_exit 879 -#define MVM_OP_prof_allocated 880 -#define MVM_OP_ctw_check 881 -#define MVM_OP_coverage_log 882 -#define MVM_OP_breakpoint 883 +#define MVM_OP_sp_fastbox_i 860 +#define MVM_OP_sp_fastbox_bi 861 +#define MVM_OP_sp_fastbox_i_ic 862 +#define MVM_OP_sp_fastbox_bi_ic 863 +#define MVM_OP_sp_deref_get_i64 864 +#define MVM_OP_sp_deref_get_n 865 +#define MVM_OP_sp_deref_bind_i64 866 +#define MVM_OP_sp_deref_bind_n 867 +#define MVM_OP_sp_getlexvia_o 868 +#define MVM_OP_sp_getlexvia_ins 869 +#define MVM_OP_sp_getstringfrom 870 +#define MVM_OP_sp_getwvalfrom 871 +#define MVM_OP_sp_jit_enter 872 +#define MVM_OP_sp_boolify_iter 873 +#define MVM_OP_sp_boolify_iter_arr 874 +#define MVM_OP_sp_boolify_iter_hash 875 +#define MVM_OP_sp_cas_o 876 +#define MVM_OP_sp_atomicload_o 877 +#define MVM_OP_sp_atomicstore_o 878 +#define MVM_OP_prof_enter 879 +#define MVM_OP_prof_enterspesh 880 +#define MVM_OP_prof_enterinline 881 +#define MVM_OP_prof_enternative 882 +#define MVM_OP_prof_exit 883 +#define MVM_OP_prof_allocated 884 +#define MVM_OP_ctw_check 885 +#define MVM_OP_coverage_log 886 +#define MVM_OP_breakpoint 887 #define MVM_OP_EXT_BASE 1024 #define MVM_OP_EXT_CU_LIMIT 1024 diff --git a/src/jit/graph.c b/src/jit/graph.c index 42e030cbc6..a48baf236b 100644 --- a/src/jit/graph.c +++ b/src/jit/graph.c @@ -1823,6 +1823,9 @@ static MVMint32 consume_ins(MVMThreadContext *tc, MVMJitGraph *jg, case MVM_OP_sp_cas_o: case MVM_OP_sp_atomicload_o: case MVM_OP_sp_atomicstore_o: + /* Specialized boxings */ + case MVM_OP_sp_fastbox_i: + case MVM_OP_sp_fastbox_i_ic: jg_append_primitive(tc, jg, ins); break; /* Unspecialized parameter access */ diff --git a/src/jit/x64/emit.dasc b/src/jit/x64/emit.dasc index ecf1c19463..136730cbf1 100644 --- a/src/jit/x64/emit.dasc +++ b/src/jit/x64/emit.dasc @@ -369,6 +369,22 @@ static MVMint64 fits_in_32_bit(MVMint64 number) { return (number >= INT32_MIN) && (number <= INT32_MAX); } +static void emit_fastcreate(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg, + MVMSpeshIns *ins) { + MVMint16 dst = ins->operands[0].reg.orig; + MVMuint16 size = ins->operands[1].lit_i16; + MVMint16 spesh_idx = ins->operands[2].lit_i16; + | mov ARG1, TC; + | mov ARG2, size; + | callp &MVM_gc_allocate_nursery; + | get_spesh_slot TMP1, spesh_idx; + | mov aword OBJECT:RV->st, TMP1; // st is 64 bit (pointer) + | mov word OBJECT:RV->header.size, size; // object size is 16 bit + | mov TMP1d, dword TC->thread_id; // thread id is 32 bit + | mov dword OBJECT:RV->header.owner, TMP1d; // does this even work? + | mov aword WORK[dst], RV; // store in local register +} + /* compile per instruction, can't really do any better yet */ void MVM_jit_emit_primitive(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitGraph *jg, MVMJitPrimitive * prim) { @@ -1684,21 +1700,9 @@ void MVM_jit_emit_primitive(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJ | mov WORK[dst], TMP2; break; } - case MVM_OP_sp_fastcreate: { - MVMint16 dst = ins->operands[0].reg.orig; - MVMuint16 size = ins->operands[1].lit_i16; - MVMint16 spesh_idx = ins->operands[2].lit_i16; - | mov ARG1, TC; - | mov ARG2, size; - | callp &MVM_gc_allocate_nursery; - | get_spesh_slot TMP1, spesh_idx; - | mov aword OBJECT:RV->st, TMP1; // st is 64 bit (pointer) - | mov word OBJECT:RV->header.size, size; // object size is 16 bit - | mov TMP1d, dword TC->thread_id; // thread id is 32 bit - | mov dword OBJECT:RV->header.owner, TMP1d; // does this even work? - | mov aword WORK[dst], RV; // store in local register + case MVM_OP_sp_fastcreate: + emit_fastcreate(tc, compiler, jg, ins); break; - } case MVM_OP_decont: case MVM_OP_sp_decont: { MVMint16 dst = ins->operands[0].reg.orig; @@ -2172,6 +2176,34 @@ void MVM_jit_emit_primitive(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJ | call FUNCTION; break; } + case MVM_OP_sp_fastbox_i: + case MVM_OP_sp_fastbox_i_ic: { + MVMint32 use_cache = op == MVM_OP_sp_fastbox_i_ic; + MVMint16 offset = ins->operands[3].lit_i16; + MVMint16 val = ins->operands[4].reg.orig; + if (use_cache) { + MVMObject **cache = tc->instance->int_const_cache->cache[ins->operands[5].lit_i16]; + MVMint16 dst = ins->operands[0].reg.orig; + | mov TMP1, WORK[val] + | cmp TMP1, 14 + | jg >1 + | cmp TMP1, -1 + | jl >1 + | inc TMP1 + | mov TMP2, qword cache + | mov TMP2, [TMP2 + TMP1 * 8] + | mov WORK[dst], TMP2 + | jmp >2 + |1: + } + emit_fastcreate(tc, compiler, jg, ins); + | mov TMP1, WORK[val]; + | mov qword [RV+offset], TMP1; + if (use_cache) { + |2: + } + break; + } default: MVM_panic(1, "Can't JIT opcode <%s>", ins->info->name); }