diff --git a/src/jit/Analysis.cpp b/src/jit/Analysis.cpp index 2b968abea..1b3f40581 100644 --- a/src/jit/Analysis.cpp +++ b/src/jit/Analysis.cpp @@ -324,7 +324,7 @@ void JITCompiler::buildVariables(uint32_t requiredStackSize) DependencyGenContext dependencyCtx(dependencySize, requiredStackSize); bool updateDeps = true; - m_variableList = new VariableList(variableCount); + m_variableList = new VariableList(variableCount, requiredStackSize); nextTryBlock = m_tryBlockStart; for (uint32_t i = 0; i < requiredStackSize; i++) { @@ -359,6 +359,8 @@ void JITCompiler::buildVariables(uint32_t requiredStackSize) const ValueTypeVector& param = module()->functionType(tagType->sigIndex())->param(); Label* catchLabel = it.u.handler; + m_variableList->pushCatchUpdate(catchLabel, param.size()); + dependencyCtx.update(catchLabel->m_dependencyStart, catchLabel->id(), STACK_OFFSET(it.stackSizeToBe), param, m_variableList); } diff --git a/src/jit/Backend.cpp b/src/jit/Backend.cpp index f267253be..ce46323e8 100644 --- a/src/jit/Backend.cpp +++ b/src/jit/Backend.cpp @@ -70,6 +70,11 @@ struct JITArg { void JITArg::set(Operand* operand) { if (VARIABLE_TYPE(operand->ref) != Operand::Immediate) { + if (VARIABLE_TYPE(operand->ref) == Operand::Register) { + this->arg = VARIABLE_GET_REF(operand->ref); + this->argw = 0; + return; + } this->arg = SLJIT_MEM1(kFrameReg); this->argw = static_cast(VARIABLE_GET_OFFSET(operand->ref)); return; @@ -240,6 +245,19 @@ CompileContext* CompileContext::get(sljit_compiler* compiler) return reinterpret_cast(context); } +#define GET_TARGET_REG(arg, default_reg) \ + (SLJIT_IS_REG(arg) ? (arg) : (default_reg)) +#define GET_SOURCE_REG(arg, default_reg) \ + (SLJIT_IS_REG(arg) ? (arg) : (default_reg)) +#define MOVE_TO_REG(compiler, mov_op, target_reg, arg, argw) \ + if ((target_reg) != (arg)) { \ + sljit_emit_op1(compiler, mov_op, (target_reg), 0, (arg), (argw)); \ + } +#define MOVE_FROM_REG(compiler, mov_op, arg, argw, source_reg) \ + if ((source_reg) != (arg)) { \ + sljit_emit_op1(compiler, mov_op, (arg), (argw), (source_reg), 0); \ + } + static void moveIntToDest(sljit_compiler* compiler, sljit_s32 movOp, JITArg& dstArg, sljit_sw offset) { if (SLJIT_IS_REG(dstArg.arg)) { @@ -262,19 +280,103 @@ static void moveFloatToDest(sljit_compiler* compiler, sljit_s32 movOp, JITArg& d sljit_emit_fop1(compiler, movOp, dstArg.arg, dstArg.argw, SLJIT_TMP_DEST_FREG, 0); } -#define GET_TARGET_REG(arg, default_reg) \ - (SLJIT_IS_REG(arg) ? (arg) : (default_reg)) -#define GET_SOURCE_REG(arg, default_reg) \ - (SLJIT_IS_REG(arg) ? (arg) : (default_reg)) -#define MOVE_TO_REG(compiler, mov_op, target_reg, arg, argw) \ - if ((target_reg) != (arg)) { \ - sljit_emit_op1(compiler, mov_op, (target_reg), 0, (arg), (argw)); \ +static void emitInitR0R1(sljit_compiler* compiler, sljit_s32 movOp1, sljit_s32 movOp2, JITArg* params) +{ + if (params[1].arg != SLJIT_R0) { + MOVE_TO_REG(compiler, movOp1, SLJIT_R0, params[0].arg, params[0].argw); + MOVE_TO_REG(compiler, movOp2, SLJIT_R1, params[1].arg, params[1].argw); + return; } -#define MOVE_FROM_REG(compiler, mov_op, arg, argw, source_reg) \ - if ((source_reg) != (arg)) { \ - sljit_emit_op1(compiler, mov_op, (arg), (argw), (source_reg), 0); \ + + if (params[0].arg != SLJIT_R1) { + sljit_emit_op1(compiler, movOp2, SLJIT_R1, 0, SLJIT_R0, 0); + MOVE_TO_REG(compiler, movOp1, SLJIT_R0, params[0].arg, params[0].argw); + return; + } + + // Swap arguments. + sljit_emit_op1(compiler, movOp2, SLJIT_TMP_DEST_REG, 0, SLJIT_R0, 0); + sljit_emit_op1(compiler, movOp1, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, movOp2, SLJIT_R1, 0, SLJIT_TMP_DEST_REG, 0); +} + +static void emitInitR0R1R2(sljit_compiler* compiler, sljit_s32 movOp, Operand* params) +{ + JITArg src[3] = { params, params + 1, params + 2 }; + int dependencies[3] = { 0 }; + + if (src[1].arg == SLJIT_R0) { + dependencies[0] = 1; + } + if (src[2].arg == SLJIT_R0) { + dependencies[0]++; + } + + if (src[0].arg == SLJIT_R1) { + dependencies[1] = 1; + } + if (src[2].arg == SLJIT_R1) { + dependencies[1]++; } + if (src[0].arg == SLJIT_R2) { + dependencies[2] = 1; + } + if (src[1].arg == SLJIT_R2) { + dependencies[2]++; + } + + for (int i = 0; i < 3; i++) { + if (dependencies[i] != 0) { + continue; + } + + sljit_emit_op1(compiler, movOp, SLJIT_R(i), 0, src[i].arg, src[i].argw); + + int other1 = i > 0 ? 0 : 1; + int other2 = i < 2 ? 2 : 1; + int sljit_r1 = SLJIT_R(other1); + int sljit_r2 = SLJIT_R(other2); + + ASSERT(i != other1 && i != other2 && other1 != other2); + + if (src[other2].arg != sljit_r1) { + MOVE_TO_REG(compiler, movOp, sljit_r1, src[other1].arg, src[other1].argw); + MOVE_TO_REG(compiler, movOp, sljit_r2, src[other2].arg, src[other2].argw); + } else if (src[other1].arg != sljit_r2) { + sljit_emit_op1(compiler, movOp, sljit_r2, 0, sljit_r1, 0); + MOVE_TO_REG(compiler, movOp, sljit_r1, src[other1].arg, src[other1].argw); + } else { + // Swap arguments. + sljit_emit_op1(compiler, movOp, SLJIT_TMP_DEST_FREG, 0, sljit_r1, 0); + sljit_emit_op1(compiler, movOp, sljit_r1, 0, sljit_r2, 0); + sljit_emit_op1(compiler, movOp, sljit_r2, 0, SLJIT_TMP_DEST_FREG, 0); + } + + return; + } + + sljit_emit_op1(compiler, movOp, SLJIT_TMP_DEST_FREG, 0, SLJIT_R0, 0); + + if (src[0].arg == SLJIT_R1) { + ASSERT(src[1].arg == SLJIT_R2); + + sljit_emit_op1(compiler, movOp, SLJIT_R0, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, movOp, SLJIT_R1, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, movOp, SLJIT_R2, 0, SLJIT_TMP_DEST_FREG, 0); + return; + } + + ASSERT(src[2].arg == SLJIT_R1); + sljit_emit_op1(compiler, movOp, SLJIT_R0, 0, SLJIT_R2, 0); + sljit_emit_op1(compiler, movOp, SLJIT_R2, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, movOp, SLJIT_R1, 0, SLJIT_TMP_DEST_FREG, 0); +} + +static void emitSelect128(sljit_compiler*, Instruction*, sljit_s32); +static void emitStoreImmediate(sljit_compiler* compiler, Operand* to, Instruction* instr, bool isFloat); +static void emitMove(sljit_compiler*, uint32_t type, Operand* from, Operand* to); + #if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #define HAS_SIMD @@ -315,8 +417,6 @@ static void simdOperandToArg(sljit_compiler* compiler, Operand* operand, JITArg& #include "FloatMathInl.h" -static void emitSelect128(sljit_compiler*, Instruction*, sljit_s32); - #if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) #include "IntMath32Inl.h" #else /* !SLJIT_32BIT_ARCHITECTURE */ @@ -437,6 +537,146 @@ void SlowCase::emit(sljit_compiler* compiler) } } +static void emitStoreImmediate(sljit_compiler* compiler, Operand* to, Instruction* instr, bool isFloat) +{ + if (VARIABLE_TYPE(to->ref) == Operand::Offset) { + sljit_sw offset = VARIABLE_GET_OFFSET(to->ref); + + switch (instr->opcode()) { +#ifdef HAS_SIMD + case ByteCode::Const128Opcode: { + const uint8_t* value = reinterpret_cast(instr->byteCode())->value(); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, SLJIT_TMP_DEST_FREG, SLJIT_MEM0(), (sljit_sw)value); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, SLJIT_TMP_DEST_FREG, SLJIT_MEM1(kFrameReg), offset); + return; + } +#endif /* HAS_SIMD */ + case ByteCode::Const32Opcode: { + uint32_t value32 = reinterpret_cast(instr->byteCode())->value(); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(kFrameReg), offset, SLJIT_IMM, static_cast(value32)); + return; + } + default: { + ASSERT(instr->opcode() == ByteCode::Const64Opcode); + + uint64_t value64 = reinterpret_cast(instr->byteCode())->value(); +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset + WORD_LOW_OFFSET, SLJIT_IMM, static_cast(value64)); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset + WORD_HIGH_OFFSET, SLJIT_IMM, static_cast(value64 >> 32)); +#else /* !SLJIT_32BIT_ARCHITECTURE */ + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset, SLJIT_IMM, static_cast(value64)); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + return; + } + } + } + + sljit_s32 reg = static_cast(VARIABLE_GET_REF(to->ref)); + + switch (instr->opcode()) { +#ifdef HAS_SIMD + case ByteCode::Const128Opcode: { + const uint8_t* value = reinterpret_cast(instr->byteCode())->value(); + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, reg, SLJIT_MEM0(), (sljit_sw)value); + return; + } +#endif /* HAS_SIMD */ + case ByteCode::Const32Opcode: { + uint32_t value32 = reinterpret_cast(instr->byteCode())->value(); + + if (isFloat) { + union { + uint32_t valueI32; + sljit_f32 valueF32; + } u; + + u.valueI32 = value32; + sljit_emit_fset32(compiler, reg, u.valueF32); + } else { + sljit_emit_op1(compiler, SLJIT_MOV32, reg, 0, SLJIT_IMM, static_cast(value32)); + } + return; + } + default: { + ASSERT(instr->opcode() == ByteCode::Const64Opcode); + + uint64_t value64 = reinterpret_cast(instr->byteCode())->value(); + + if (isFloat) { + union { + uint64_t valueI64; + sljit_f64 valueF64; + } u; + + u.valueI64 = value64; + sljit_emit_fset64(compiler, reg, u.valueF64); + } else { +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + sljit_emit_op1(compiler, SLJIT_MOV, reg & 0xff, 0, SLJIT_IMM, static_cast(value64)); + sljit_emit_op1(compiler, SLJIT_MOV, reg >> 8, 0, SLJIT_IMM, static_cast(value64 >> 32)); +#else /* !SLJIT_32BIT_ARCHITECTURE */ + sljit_emit_op1(compiler, SLJIT_MOV, reg, 0, SLJIT_IMM, static_cast(value64)); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } + return; + } + } +} + +static void emitMove(sljit_compiler* compiler, uint32_t type, Operand* from, Operand* to) +{ + ASSERT(VARIABLE_TYPE(from->ref) != Operand::Immediate && VARIABLE_TYPE(to->ref) != Operand::Immediate); + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (type == Instruction::Int64Operand) { + JITArgPair src(from); + JITArgPair dst(to); + + sljit_emit_op1(compiler, SLJIT_MOV, dst.arg1, dst.arg1w, src.arg1, src.arg1w); + sljit_emit_op1(compiler, SLJIT_MOV, dst.arg2, dst.arg2w, src.arg2, src.arg2w); + return; + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + JITArg src(from); + JITArg dst(to); + + switch (type) { + case Instruction::Int32Operand: + sljit_emit_op1(compiler, SLJIT_MOV32, dst.arg, dst.argw, src.arg, src.argw); + return; +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + case Instruction::Int64Operand: + sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, src.arg, src.argw); + return; +#endif /* SLJIT_64BIT_ARCHITECTURE */ + case Instruction::Float32Operand: + sljit_emit_fop1(compiler, SLJIT_MOV_F32, dst.arg, dst.argw, src.arg, src.argw); + return; + case Instruction::Float64Operand: + sljit_emit_fop1(compiler, SLJIT_MOV_F64, dst.arg, dst.argw, src.arg, src.argw); + return; + default: + break; + } + + ASSERT(type == Instruction::V128Operand); + + if (!SLJIT_IS_MEM(src.arg)) { + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128, src.arg, dst.arg, dst.argw); + return; + } + + sljit_s32 dstReg = GET_TARGET_REG(dst.arg, SLJIT_TMP_DEST_FREG); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128, dstReg, src.arg, src.argw); + + if (dstReg == SLJIT_TMP_DEST_FREG) { + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_FREG, dst.arg, dst.argw); + } +} + static void emitImmediate(sljit_compiler* compiler, Instruction* instr) { Operand* result = instr->operands(); @@ -447,7 +687,7 @@ static void emitImmediate(sljit_compiler* compiler, Instruction* instr) return; } - emitStoreImmediate(compiler, static_cast(VARIABLE_GET_OFFSET(result->ref)), instr); + emitStoreImmediate(compiler, result, instr, (instr->info() & Instruction::kHasFloatOperand) != 0); } static void emitEnd(sljit_compiler* compiler, Instruction* instr) @@ -455,20 +695,26 @@ static void emitEnd(sljit_compiler* compiler, Instruction* instr) End* end = reinterpret_cast(instr->byteCode()); Operand* param = instr->params(); - Operand* paramEnd = param + instr->paramCount(); - ByteCodeStackOffset* offsets = end->resultOffsets(); + ByteCodeStackOffset* stackOffset = end->resultOffsets(); CompileContext* context = CompileContext::get(compiler); - const ValueTypeVector& result = context->compiler->moduleFunction()->functionType()->result(); - size_t idx = 0; + FunctionType* functionType = context->compiler->moduleFunction()->functionType(); + + for (auto it : functionType->result()) { + Operand dst; + dst.ref = VARIABLE_SET(STACK_OFFSET(*stackOffset), Operand::Offset); - while (param < paramEnd) { - if (VARIABLE_TYPE(param->ref) == Operand::Immediate && !(VARIABLE_GET_IMM(param->ref)->info() & Instruction::kKeepInstruction)) { - emitStoreImmediate(compiler, *offsets, VARIABLE_GET_IMM(param->ref)); + switch (VARIABLE_TYPE(param->ref)) { + case Operand::Immediate: + ASSERT(!(VARIABLE_GET_IMM(param->ref)->info() & Instruction::kKeepInstruction)); + emitStoreImmediate(compiler, &dst, VARIABLE_GET_IMM(param->ref), false); + break; + case Operand::Register: + emitMove(compiler, Instruction::valueTypeToOperandType(it), param, &dst); + break; } - offsets += (valueSize(result[idx]) + (sizeof(size_t) - 1)) / sizeof(size_t); + stackOffset += (valueSize(it) + (sizeof(size_t) - 1)) / sizeof(size_t); param++; - idx++; } sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_IMM, reinterpret_cast(end->resultOffsets())); @@ -506,22 +752,22 @@ static void emitBrTable(sljit_compiler* compiler, BrTableInstruction* instr) Label** end = label + targetLabelCount; JITArg src(instr->operands()); - sljit_s32 offsetReg = GET_SOURCE_REG(src.arg, SLJIT_R0); + sljit_s32 offsetReg = GET_SOURCE_REG(src.arg, SLJIT_TMP_DEST_REG); MOVE_TO_REG(compiler, SLJIT_MOV32, offsetReg, src.arg, src.argw); if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { sljit_emit_op2u(compiler, SLJIT_SUB32 | SLJIT_SET_GREATER_EQUAL, offsetReg, 0, SLJIT_IMM, static_cast(targetLabelCount)); - sljit_emit_select(compiler, SLJIT_GREATER_EQUAL | SLJIT_32, SLJIT_R0, SLJIT_IMM, static_cast(targetLabelCount), offsetReg); + sljit_emit_select(compiler, SLJIT_GREATER_EQUAL | SLJIT_32, SLJIT_TMP_DEST_REG, SLJIT_IMM, static_cast(targetLabelCount), offsetReg); - offsetReg = SLJIT_R0; + offsetReg = SLJIT_TMP_DEST_REG; end++; } else { sljit_jump* jump = sljit_emit_cmp(compiler, SLJIT_GREATER_EQUAL | SLJIT_32, offsetReg, 0, SLJIT_IMM, static_cast(targetLabelCount)); (*end)->jumpFrom(jump); } - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, static_cast(context->branchTableOffset)); - sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM2(SLJIT_R1, offsetReg), SLJIT_WORD_SHIFT); + sljit_emit_op2(compiler, SLJIT_SHL, SLJIT_TMP_MEM_REG, 0, offsetReg, 0, SLJIT_IMM, SLJIT_WORD_SHIFT); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(SLJIT_TMP_MEM_REG), static_cast(context->branchTableOffset)); sljit_uw* target = reinterpret_cast(context->branchTableOffset); @@ -551,7 +797,7 @@ static void emitGlobalGet32(sljit_compiler* compiler, Instruction* instr) sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_MEM1(kContextReg), OffsetOfContextField(instance)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_MEM1(SLJIT_TMP_MEM_REG), context->globalsStart + globalGet->index() * sizeof(void*)); - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { moveFloatToDest(compiler, SLJIT_MOV_F32, dstArg, JITFieldAccessor::globalValueOffset()); } else { moveIntToDest(compiler, SLJIT_MOV32, dstArg, JITFieldAccessor::globalValueOffset()); @@ -565,7 +811,7 @@ static void emitGlobalSet32(sljit_compiler* compiler, Instruction* instr) JITArg src; sljit_s32 baseReg; - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { floatOperandToArg(compiler, instr->operands(), src, SLJIT_TMP_DEST_FREG); baseReg = SLJIT_TMP_MEM_REG; } else { @@ -576,7 +822,7 @@ static void emitGlobalSet32(sljit_compiler* compiler, Instruction* instr) sljit_emit_op1(compiler, SLJIT_MOV, baseReg, 0, SLJIT_MEM1(kContextReg), OffsetOfContextField(instance)); if (SLJIT_IS_MEM(src.arg)) { - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_TMP_DEST_FREG, 0, src.arg, src.argw); src.arg = SLJIT_TMP_DEST_FREG; } else { @@ -588,7 +834,7 @@ static void emitGlobalSet32(sljit_compiler* compiler, Instruction* instr) sljit_emit_op1(compiler, SLJIT_MOV, baseReg, 0, SLJIT_MEM1(baseReg), context->globalsStart + globalSet->index() * sizeof(void*)); - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(baseReg), JITFieldAccessor::globalValueOffset(), src.arg, src.argw); } else { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(baseReg), JITFieldAccessor::globalValueOffset(), src.arg, src.argw); @@ -605,6 +851,35 @@ static void emitRefFunc(sljit_compiler* compiler, Instruction* instr) moveIntToDest(compiler, SLJIT_MOV_P, dstArg, context->functionsStart + (sizeof(Function*) * (reinterpret_cast(instr->byteCode()))->funcIndex())); } +static void emitStackInit(sljit_compiler* compiler, Instruction* instr) +{ + uint32_t type; + + switch (instr->opcode()) { + case ByteCode::MoveI32Opcode: + type = Instruction::Int32Operand; + break; + case ByteCode::MoveI64Opcode: + type = Instruction::Int64Operand; + break; + case ByteCode::MoveF32Opcode: + type = Instruction::Float32Operand; + break; + case ByteCode::MoveF64Opcode: + type = Instruction::Float64Operand; + break; + default: + ASSERT(instr->opcode() == ByteCode::MoveV128Opcode); + type = Instruction::V128Operand; + break; + } + + Operand src; + src.ref = instr->asExtended()->value().offset; + + emitMove(compiler, type, &src, instr->operands()); +} + JITModule::~JITModule() { delete m_instanceConstData; @@ -851,6 +1126,10 @@ void JITCompiler::compileFunction(JITFunction* jitFunc, bool isExternal) break; } #endif /* HAS_SIMD */ + case Instruction::StackInit: { + emitStackInit(m_compiler, item->asInstruction()); + break; + } default: { switch (item->asInstruction()->opcode()) { case ByteCode::SelectOpcode: { @@ -1014,15 +1293,14 @@ void JITCompiler::clear() void JITCompiler::emitProlog() { FunctionList& func = m_functionList.back(); - sljit_s32 savedRegCount = 4; if (func.isExported) { func.exportEntryLabel = sljit_emit_label(m_compiler); } sljit_emit_enter(m_compiler, SLJIT_ENTER_REG_ARG | SLJIT_ENTER_KEEP(2), SLJIT_ARGS0(P), - SLJIT_NUMBER_OF_SCRATCH_REGISTERS, savedRegCount, - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, sizeof(ExecutionContext::CallFrame)); + SLJIT_NUMBER_OF_SCRATCH_REGISTERS, m_savedIntegerRegCount + 2, + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, m_savedFloatRegCount, sizeof(ExecutionContext::CallFrame)); // Setup new frame. sljit_emit_op1(m_compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_MEM1(kContextReg), OffsetOfContextField(lastFrame)); diff --git a/src/jit/ByteCodeParser.cpp b/src/jit/ByteCodeParser.cpp index 5ad5dc334..f2ccba903 100644 --- a/src/jit/ByteCodeParser.cpp +++ b/src/jit/ByteCodeParser.cpp @@ -151,16 +151,15 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL3(OTOp2F32, /* SSD */ F32, F32, F32 | S0 | S1) \ OL3(OTOp2F64, /* SSD */ F64, F64, F64 | S0 | S1) \ OL1(OTGetI32, /* S */ I32) \ - OL1(OTPutI32, /* D */ I32 | TMP) \ + OL1(OTPutI32, /* D */ I32) \ OL1(OTPutI64, /* D */ I64) \ OL1(OTPutV128, /* D */ V128) \ OL1(OTPutPTR, /* D */ PTR) \ - OL2(OTMoveI32, /* SD */ I32, I32 | S0) \ OL2(OTMoveF32, /* SD */ F32 | NOTMP, F32 | S0) \ - OL2(OTMoveI64, /* SD */ I64, I64 | S0) \ OL2(OTMoveF64, /* SD */ F64 | NOTMP, F64 | S0) \ OL2(OTMoveV128, /* SD */ V128, V128 | S0) \ - OL3(OTCompareI64, /* SSD */ I64, I64, I32 | S0 | S1) \ + OL2(OTEqzI64, /* SD */ I64, I32) \ + OL3(OTCompareI64, /* SSD */ I64, I64, I32) \ OL3(OTCompareF32, /* SSD */ F32, F32, I32) \ OL3(OTCompareF64, /* SSD */ F64, F64, I32) \ OL3(OTCopySignF32, /* SSD */ F32, F32, F32 | TMP | S0 | S1) \ @@ -168,14 +167,13 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL2(OTDemoteF64, /* SD */ F64, F32 | S0) \ OL2(OTPromoteF32, /* SD */ F32, F64 | S0) \ OL4(OTLoadI32, /* SDTT */ I32, I32 | S0, PTR, I32 | S0) \ - OL4(OTLoadI64, /* SDTT */ I32, I64 | S0, PTR, I32 | S0) \ OL4(OTLoadF32, /* SDTT */ I32, F32, PTR, I32 | S0) \ OL4(OTLoadF64, /* SDTT */ I32, F64, PTR, I32 | S0) \ OL4(OTLoadV128, /* SDTT */ I32, V128 | TMP, PTR, I32 | S0) \ OL5(OTLoadLaneV128, /* SSDTTT */ I32, V128 | NOTMP, V128 | TMP | S1, PTR, I32 | S0) \ OL5(OTStoreI32, /* SSTTT */ I32, I32, PTR, I32 | S0, I32 | S1) \ - OL5(OTStoreI64, /* SSTTT */ I32, I64, PTR, I32 | S0, PTR | S1) \ OL4(OTStoreF32, /* SSTT */ I32, F32 | NOTMP, PTR, I32 | S0) \ + OL5(OTStoreI64, /* SSTTT */ I32, I64, PTR, I32 | S0, PTR | S1) \ OL4(OTStoreF64, /* SSTT */ I32, F64 | NOTMP, PTR, I32 | S0) \ OL4(OTStoreV128, /* SSTT */ I32, V128 | TMP, PTR, I32 | S0) \ OL3(OTCallback3Arg, /* SSS */ I32, I32, I32) \ @@ -188,8 +186,8 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL2(OTGlobalSetI64, /* ST */ I64, PTR) \ OL1(OTGlobalSetF32, /* S */ F32 | NOTMP) \ OL1(OTGlobalSetF64, /* S */ F64 | NOTMP) \ - OL2(OTConvertInt32FromInt64, /* SD */ I64, I32 | S0) \ - OL2(OTConvertInt64FromInt32, /* SD */ I32, I64 | S0) \ + OL2(OTConvertInt32FromInt64, /* SD */ I64, I32) \ + OL2(OTConvertInt64FromInt32, /* SD */ I32, I64) \ OL2(OTConvertInt32FromFloat32, /* SD */ F32 | TMP, I32 | TMP) \ OL2(OTConvertInt32FromFloat64, /* SD */ F64 | TMP, I32 | TMP) \ OL2(OTConvertInt64FromFloat32Callback, /* SD */ F32, I64) \ @@ -207,9 +205,10 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) #define OPERAND_TYPE_LIST_MATH \ OL3(OTOp2I64, /* SSD */ I64, I64, I64 | TMP | S0 | S1) \ OL3(OTShiftI64, /* SSD */ I64, I64 | LOW, I64 | TMP | S0) \ - OL4(OTMulI64, /* SSDT */ I64, I64, I64 | TMP | S0, I32 | S1) \ + OL3(OTMulI64, /* SSDT */ I64, I64, I64 | S0 | S1) \ OL3(OTDivRemI64, /* SSD */ I64, I64, I64 | S0 | S1) \ OL2(OTCountZeroesI64, /* SD */ I64, I64 | TMP | S0) \ + OL4(OTLoadI64, /* SDTT */ I32, I64, PTR, I32 | S0) \ OL5(OTStoreI64Low, /* SSTTT */ I32, I64 | LOW, PTR, I32 | S0, PTR | S1) \ OL1(OTGlobalGetI64, /* D */ I64_LOW) \ OL2(OTConvertInt32FromFloat32Callback, /* SD */ F32, I32) \ @@ -220,6 +219,7 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) #define OPERAND_TYPE_LIST_MATH \ OL3(OTOp2I64, /* SSD */ I64, I64, I64 | S0 | S1) \ + OL4(OTLoadI64, /* SDTT */ I32, I64 | S0, PTR, I32 | S0) \ OL1(OTGlobalGetI64, /* D */ I64) \ OL2(OTConvertInt64FromFloat32, /* SD */ F32 | TMP, I64 | TMP) \ OL2(OTConvertInt64FromFloat64, /* SD */ F64 | TMP, I64 | TMP) \ @@ -565,7 +565,7 @@ static void compileFunction(JITCompiler* compiler) case ByteCode::I64GeUOpcode: { group = Instruction::Compare; paramCount = 2; - info = Instruction::kIsMergeCompare; + info = Instruction::kIsMergeCompare | Instruction::kFreeUnusedEarly; requiredInit = OTCompareI64; break; } @@ -723,13 +723,14 @@ static void compileFunction(JITCompiler* compiler) case ByteCode::I64EqzOpcode: { group = Instruction::Compare; paramCount = 1; - info = Instruction::kIsMergeCompare; - requiredInit = OTOp1I64; + info = Instruction::kIsMergeCompare | Instruction::kFreeUnusedEarly; + requiredInit = OTEqzI64; break; } case ByteCode::I32WrapI64Opcode: { group = Instruction::Convert; paramCount = 1; + info = Instruction::kFreeUnusedEarly; requiredInit = OTConvertInt32FromInt64; break; } @@ -737,6 +738,7 @@ static void compileFunction(JITCompiler* compiler) case ByteCode::I64ExtendI32UOpcode: { group = Instruction::Convert; paramCount = 1; + info = Instruction::kFreeUnusedEarly; requiredInit = OTConvertInt64FromInt32; break; } @@ -934,6 +936,7 @@ static void compileFunction(JITCompiler* compiler) Instruction* instr = compiler->appendExtended(byteCode, Instruction::Call, opcode, functionType->param().size() + callerCount, functionType->result().size()); Operand* operand = instr->operands(); + instr->addInfo(Instruction::kIsCallback | Instruction::kFreeUnusedEarly); for (auto it : functionType->param()) { operand->ref = STACK_OFFSET(*stackOffset); @@ -1460,13 +1463,13 @@ static void compileFunction(JITCompiler* compiler) switch (opcode) { case ByteCode::MoveI32Opcode: - requiredInit = OTMoveI32; + requiredInit = OTOp1I32; break; case ByteCode::MoveF32Opcode: requiredInit = OTMoveF32; break; case ByteCode::MoveI64Opcode: - requiredInit = OTMoveI64; + requiredInit = OTOp1I64; break; case ByteCode::MoveF64Opcode: requiredInit = OTMoveF64; @@ -1493,7 +1496,6 @@ static void compileFunction(JITCompiler* compiler) Operand* operands = instr->operands(); if (isFloatGlobal(globalGet32->index(), compiler->module())) { - instr->addInfo(Instruction::kIsGlobalFloatBit); instr->setRequiredRegsDescriptor(OTGlobalGetF32); } @@ -1508,7 +1510,6 @@ static void compileFunction(JITCompiler* compiler) Operand* operands = instr->operands(); if (isFloatGlobal(globalGet64->index(), compiler->module())) { - instr->addInfo(Instruction::kIsGlobalFloatBit); instr->setRequiredRegsDescriptor(OTGlobalGetF64); } @@ -1533,7 +1534,6 @@ static void compileFunction(JITCompiler* compiler) Operand* operands = instr->operands(); if (isFloatGlobal(globalSet32->index(), compiler->module())) { - instr->addInfo(Instruction::kIsGlobalFloatBit); instr->setRequiredRegsDescriptor(OTGlobalSetF32); } @@ -1548,7 +1548,6 @@ static void compileFunction(JITCompiler* compiler) Operand* operands = instr->operands(); if (isFloatGlobal(globalSet64->index(), compiler->module())) { - instr->addInfo(Instruction::kIsGlobalFloatBit); instr->setRequiredRegsDescriptor(OTGlobalSetF64); } @@ -1899,12 +1898,13 @@ static void compileFunction(JITCompiler* compiler) } compiler->buildVariables(STACK_OFFSET(function->requiredStackSize())); + compiler->allocateRegisters(); if (compiler->verboseLevel() >= 1) { compiler->dump(); } - compiler->allocateRegisters(); + compiler->freeVariables(); Walrus::JITFunction* jitFunc = new JITFunction(); @@ -1912,6 +1912,39 @@ static void compileFunction(JITCompiler* compiler) compiler->compileFunction(jitFunc, true); } +const uint8_t* VariableList::getOperandDescriptor(Instruction* instr) +{ + uint32_t requiredInit = OTNone; + + switch (instr->opcode()) { + case ByteCode::Load32Opcode: + requiredInit = OTLoadF32; + break; + case ByteCode::Load64Opcode: + requiredInit = OTLoadF64; + break; + case ByteCode::Store32Opcode: + requiredInit = OTStoreF32; + break; + case ByteCode::Store64Opcode: + requiredInit = OTStoreF64; + break; + default: + break; + } + + if (requiredInit != OTNone) { + ASSERT((instr->paramCount() + instr->resultCount()) == 2); + VariableList::Variable& variable = variables[instr->getParam(1)->ref]; + + if (variable.info & Instruction::FloatOperandMarker) { + return Instruction::getOperandDescriptorByOffset(requiredInit); + } + } + + return instr->getOperandDescriptor(); +} + void Module::jitCompile(ModuleFunction** functions, size_t functionsLength, int verboseLevel) { JITCompiler compiler(this, verboseLevel); diff --git a/src/jit/CallInl.h b/src/jit/CallInl.h index 78dd2e813..f71fe0966 100644 --- a/src/jit/CallInl.h +++ b/src/jit/CallInl.h @@ -95,8 +95,17 @@ static void emitCall(sljit_compiler* compiler, Instruction* instr) Operand* operand = instr->operands(); for (auto it : functionType->param()) { - if (VARIABLE_TYPE(operand->ref) == Operand::Immediate && !(VARIABLE_GET_IMM(operand->ref)->info() & Instruction::kKeepInstruction)) { - emitStoreImmediate(compiler, *stackOffset, VARIABLE_GET_IMM(operand->ref)); + Operand dst; + dst.ref = VARIABLE_SET(STACK_OFFSET(*stackOffset), Operand::Offset); + + switch (VARIABLE_TYPE(operand->ref)) { + case Operand::Immediate: + ASSERT(!(VARIABLE_GET_IMM(operand->ref)->info() & Instruction::kKeepInstruction)); + emitStoreImmediate(compiler, &dst, VARIABLE_GET_IMM(operand->ref), false); + break; + case Operand::Register: + emitMove(compiler, Instruction::valueTypeToOperandType(it), operand, &dst); + break; } operand++; @@ -104,10 +113,22 @@ static void emitCall(sljit_compiler* compiler, Instruction* instr) } if (instr->opcode() == ByteCode::CallIndirectOpcode) { - if (VARIABLE_TYPE(operand->ref) == Operand::Immediate && !(VARIABLE_GET_IMM(operand->ref)->info() & Instruction::kKeepInstruction)) { - CallIndirect* callIndirect = reinterpret_cast(instr->byteCode()); - emitStoreImmediate(compiler, callIndirect->calleeOffset(), VARIABLE_GET_IMM(operand->ref)); + CallIndirect* callIndirect = reinterpret_cast(instr->byteCode()); + + switch (VARIABLE_TYPE(operand->ref)) { + case Operand::Immediate: { + ASSERT(!(VARIABLE_GET_IMM(operand->ref)->info() & Instruction::kKeepInstruction)); + Const32* value = reinterpret_cast(VARIABLE_GET_IMM(operand->ref)->byteCode()); + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(kFrameReg), callIndirect->calleeOffset(), SLJIT_IMM, static_cast(value->value())); + break; + } + case Operand::Register: { + sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(kFrameReg), callIndirect->calleeOffset(), static_cast(VARIABLE_GET_REF(operand->ref)), 0); + break; + } } + + operand++; } sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_R0, 0, SLJIT_IMM, reinterpret_cast(instr->byteCode())); @@ -118,6 +139,20 @@ static void emitCall(sljit_compiler* compiler, Instruction* instr) sljit_jump* jump = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, ExecutionContext::NoError); + for (auto it : functionType->result()) { + ASSERT(VARIABLE_TYPE(operand->ref) != Operand::Immediate); + + if (VARIABLE_TYPE(operand->ref) == Operand::Register) { + Operand src; + + src.ref = VARIABLE_SET(STACK_OFFSET(*stackOffset), Operand::Offset); + emitMove(compiler, Instruction::valueTypeToOperandType(it), &src, operand); + } + + operand++; + stackOffset += (valueSize(it) + (sizeof(size_t) - 1)) / sizeof(size_t); + } + if (context->currentTryBlock == InstanceConstData::globalTryBlock) { context->appendTrapJump(ExecutionContext::ReturnToLabel, jump); return; diff --git a/src/jit/Compiler.h b/src/jit/Compiler.h index 6a9358123..f812b8c6e 100644 --- a/src/jit/Compiler.h +++ b/src/jit/Compiler.h @@ -103,6 +103,8 @@ class InstructionListItem { BitMaskSIMD, // Shift SIMD opcodes (e.g. I8X16SHL) ShiftSIMD, + // Special type for initializing values from the stack + StackInit, }; virtual ~InstructionListItem() {} @@ -192,7 +194,7 @@ class Instruction : public InstructionListItem { // Int64Operand | Float64Operand == Float64Operand Float64Operand = 6, // Helper constants for managing type info. - FloatOperandStart = V128Operand, + FloatOperandMarker = V128Operand, TypeMask = 0x7, // A temporary register must be allocated for the source or destination // operand. In case of source operands, the register is not modified. @@ -213,13 +215,14 @@ class Instruction : public InstructionListItem { // Various info bits. Depends on type. static const uint16_t kIs32Bit = 1 << 0; - static const uint16_t kIsGlobalFloatBit = kIs32Bit; static const uint16_t kIsCallback = 1 << 1; static const uint16_t kDestroysR0R1 = 1 << 2; - static const uint16_t kIsShift = 1 << 3; - static const uint16_t kIsMergeCompare = 1 << 4; - static const uint16_t kKeepInstruction = 1 << 5; - static const uint16_t kEarlyReturn = 1 << 6; + static const uint16_t kHasFloatOperand = 1 << 3; + static const uint16_t kIsShift = 1 << 4; + static const uint16_t kIsMergeCompare = 1 << 5; + static const uint16_t kFreeUnusedEarly = 1 << 6; + static const uint16_t kKeepInstruction = 1 << 7; + static const uint16_t kEarlyReturn = kKeepInstruction; ByteCode::Opcode opcode() { return m_opcode; } @@ -245,7 +248,7 @@ class Instruction : public InstructionListItem { ExtendedInstruction* asExtended() { - ASSERT(group() == Instruction::DirectBranch); + ASSERT(group() == Instruction::DirectBranch || group() == Instruction::StackInit); return reinterpret_cast(this); } @@ -277,6 +280,11 @@ class Instruction : public InstructionListItem { return m_operandDescriptors + u.m_requiredRegsDescriptor; } + static inline const uint8_t* getOperandDescriptorByOffset(uint32_t offset) + { + return m_operandDescriptors + offset; + } + static uint32_t valueTypeToOperandType(Value::Type type); protected: @@ -318,6 +326,8 @@ union InstructionValue { Label* targetLabel; // For calls. uint32_t resultCount; + // For StackInit group. + VariableRef offset; }; class ExtendedInstruction : public Instruction { @@ -330,7 +340,7 @@ class ExtendedInstruction : public Instruction { explicit ExtendedInstruction(ByteCode* byteCode, Group group, ByteCode::Opcode opcode, uint32_t paramCount, Operand* operands) : Instruction(byteCode, group, opcode, paramCount, operands) { - ASSERT(group == Instruction::DirectBranch || group == Instruction::Call); + ASSERT(group == Instruction::DirectBranch || group == Instruction::Call || group == Instruction::StackInit); } private: @@ -566,18 +576,14 @@ struct VariableList { static const uint32_t kConstraints = kIsCallback | kDestroysR0R1; static const size_t kRangeMax = ~(VariableRef)0; - - VariableList(size_t variableCount) - { - variables.reserve(variableCount); - } + static const uint8_t kUnusedReg = 0xff; struct Variable { Variable(VariableRef value, uint32_t typeInfo, size_t id) : value(value) , info(typeInfo) - , reg1(0) - , reg2(0) + , reg1(kUnusedReg) + , reg2(kUnusedReg) , rangeEnd(id) { u.rangeStart = id; @@ -586,8 +592,8 @@ struct VariableList { Variable(VariableRef value, uint32_t typeInfo, Instruction* instr) : value(value) , info(typeInfo) - , reg1(0) - , reg2(0) + , reg1(kUnusedReg) + , reg2(kUnusedReg) , rangeEnd(instr->id()) { if (instr->group() == Instruction::Immediate) { @@ -610,6 +616,25 @@ struct VariableList { size_t rangeEnd; }; + struct CatchUpdate { + CatchUpdate(Label* handler, size_t variableListStart, size_t variableListSize) + : handler(handler) + , variableListStart(variableListStart) + , variableListSize(variableListSize) + { + } + + Label* handler; + size_t variableListStart; + size_t variableListSize; + }; + + VariableList(size_t variableCount, size_t paramCount) + : paramCount(paramCount) + { + variables.reserve(variableCount); + } + VariableRef getMergeHeadSlowCase(VariableRef ref); VariableRef getMergeHead(VariableRef ref) @@ -621,7 +646,16 @@ struct VariableList { return getMergeHeadSlowCase(ref); } + void pushCatchUpdate(Label* handler, size_t variableListSize) + { + catchUpdates.push_back(CatchUpdate(handler, variables.size(), variableListSize)); + } + + const uint8_t* getOperandDescriptor(Instruction* instr); + + size_t paramCount; std::vector variables; + std::vector catchUpdates; }; class JITCompiler { @@ -649,6 +683,8 @@ class JITCompiler { ExtendedInstruction* appendExtended(ByteCode* byteCode, Instruction::Group group, ByteCode::Opcode opcode, uint32_t paramCount, uint32_t resultCount); Instruction* appendBranch(ByteCode* byteCode, ByteCode::Opcode opcode, Label* label, uint32_t offset); BrTableInstruction* appendBrTable(ByteCode* byteCode, uint32_t numTargets, uint32_t offset); + InstructionListItem* insertStackInit(InstructionListItem* prev, VariableList::Variable& variable, VariableRef ref); + void insertStackInitList(InstructionListItem* prev, size_t variableListStart, size_t variableListSize); void appendLabel(Label* label) { @@ -668,6 +704,7 @@ class JITCompiler { void dump(); void buildVariables(uint32_t requiredStackSize); void allocateRegisters(); + void freeVariables(); void compileFunction(JITFunction* jitFunc, bool isExternal); void generateCode(); @@ -713,6 +750,8 @@ class JITCompiler { size_t m_tryBlockOffset; int m_verboseLevel; uint32_t m_options; + uint8_t m_savedIntegerRegCount; + uint8_t m_savedFloatRegCount; std::vector m_tryBlocks; std::vector m_functionList; diff --git a/src/jit/InstList.cpp b/src/jit/InstList.cpp index e3fe14aa1..2c540192a 100644 --- a/src/jit/InstList.cpp +++ b/src/jit/InstList.cpp @@ -253,6 +253,67 @@ BrTableInstruction* JITCompiler::appendBrTable(ByteCode* byteCode, uint32_t numT return branch; } +InstructionListItem* JITCompiler::insertStackInit(InstructionListItem* prev, VariableList::Variable& variable, VariableRef ref) +{ + uint32_t type = variable.info & Instruction::TypeMask; + ByteCode::Opcode opcode; + + switch (type) { + case Instruction::Int32Operand: + opcode = ByteCode::MoveI32Opcode; + break; + case Instruction::Int64Operand: + opcode = ByteCode::MoveI64Opcode; + break; + case Instruction::Float32Operand: + opcode = ByteCode::MoveF32Opcode; + break; + case Instruction::Float64Operand: + opcode = ByteCode::MoveF64Opcode; + break; + default: + ASSERT(type == Instruction::V128Operand); + opcode = ByteCode::MoveV128Opcode; + break; + } + + ASSERT(!(variable.info & (VariableList::kIsMerged | VariableList::kIsImmediate))); + + ExtendedInstruction* instr = new SimpleExtendedInstruction<1>(nullptr, Instruction::StackInit, opcode, 0); + instr->m_resultCount = 1; + instr->value().offset = variable.value; + instr->operands()->ref = ref; + + if (m_last == prev) { + m_last = instr; + } + + if (prev == nullptr) { + instr->m_next = m_first; + m_first = instr; + } else { + instr->m_next = prev->m_next; + prev->m_next = instr; + } + + return instr; +} + +void JITCompiler::insertStackInitList(InstructionListItem* prev, size_t variableListStart, size_t variableListSize) +{ + size_t end = variableListStart + variableListSize; + + for (size_t i = variableListStart; i < end; i++) { + VariableRef ref = m_variableList->getMergeHead(i); + + VariableList::Variable& variable = m_variableList->variables[ref]; + + if (variable.reg1 != VariableList::kUnusedReg) { + prev = insertStackInit(prev, variable, i); + } + } +} + void JITCompiler::dump() { bool enableColors = (verboseLevel() >= 2); @@ -302,6 +363,10 @@ void JITCompiler::dump() } break; } + case Instruction::StackInit: { + printf(" Offset:%d\n", static_cast(VARIABLE_GET_OFFSET(instr->asExtended()->value().offset))); + break; + } default: { break; } @@ -351,6 +416,34 @@ void JITCompiler::dump() if (VARIABLE_TYPE(variable.value) == Operand::Offset) { printf(" (O:%d) [%d-%d]", static_cast(VARIABLE_GET_OFFSET(variable.value)), static_cast(variable.u.rangeStart), static_cast(variable.rangeEnd)); + } else if (VARIABLE_TYPE(variable.value) == Operand::Register) { + const char* prefix = ""; + uint32_t savedStart = SLJIT_R(SLJIT_NUMBER_OF_SCRATCH_REGISTERS); + uint32_t savedEnd = SLJIT_S0; + + if (variable.info & Instruction::FloatOperandMarker) { + prefix = "F"; + savedStart = SLJIT_FR(SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS); + savedEnd = SLJIT_FS0; + } + + uint32_t reg1 = static_cast(VARIABLE_GET_REF(variable.value)); + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (SLJIT_IS_REG_PAIR(reg1)) { + uint32_t reg2 = reg1 >> 8; + reg1 &= 0xff; + + printf(" (%s%c%d,%s%c%d) [%d-%d]", prefix, reg1 >= savedStart ? 'S' : 'R', reg1 >= savedStart ? savedEnd - reg1 : reg1 - SLJIT_R0, + prefix, reg2 >= savedStart ? 'S' : 'R', reg2 >= savedStart ? savedEnd - reg2 : reg2 - SLJIT_R0, + static_cast(variable.u.rangeStart), static_cast(variable.rangeEnd)); + } else { +#endif /* SLJIT_32BIT_ARCHITECTURE */ + printf(" (%s%c%d) [%d-%d]", prefix, reg1 >= savedStart ? 'S' : 'R', reg1 >= savedStart ? savedEnd - reg1 : reg1 - SLJIT_R0, + static_cast(variable.u.rangeStart), static_cast(variable.rangeEnd)); +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ } else if (enableColors) { printf(" (I:%p)", VARIABLE_GET_IMM(variable.value)); } else { @@ -369,6 +462,28 @@ void JITCompiler::dump() printf("\n"); operand++; } + + bool firstTmp = true; + for (size_t i = 0; i < 4; ++i) { + uint8_t reg = instr->requiredReg(i); + + if (reg == 0) { + continue; + } + + if (firstTmp) { + printf(" Temporary("); + firstTmp = false; + } else { + printf(","); + } + + printf("%d:r%d", static_cast(i), static_cast(reg - 1)); + } + + if (!firstTmp) { + printf(")\n"); + } } else { printf("%s%d%s: Label", labelText, static_cast(item->id()), defaultText); diff --git a/src/jit/IntMath32Inl.h b/src/jit/IntMath32Inl.h index 157c85932..704a72f13 100644 --- a/src/jit/IntMath32Inl.h +++ b/src/jit/IntMath32Inl.h @@ -43,6 +43,16 @@ struct JITArgPair { void JITArgPair::set(Operand* operand) { if (VARIABLE_TYPE(operand->ref) != Operand::Immediate) { + if (VARIABLE_TYPE(operand->ref) == Operand::Register) { + sljit_sw regs = VARIABLE_GET_REF(operand->ref); + + this->arg1 = regs & 0xff; + this->arg1w = 0; + this->arg2 = regs >> 8; + this->arg2w = 0; + return; + } + sljit_sw offset = static_cast(VARIABLE_GET_OFFSET(operand->ref)); this->arg1 = SLJIT_MEM1(kFrameReg); @@ -62,35 +72,6 @@ void JITArgPair::set(Operand* operand) this->arg2w = static_cast(value64 >> 32); } -static void emitStoreImmediate(sljit_compiler* compiler, sljit_sw offset, Instruction* instr) -{ - switch (instr->opcode()) { -#ifdef HAS_SIMD - case ByteCode::Const128Opcode: { - const uint8_t* value = reinterpret_cast(instr->byteCode())->value(); - - sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, SLJIT_TMP_DEST_FREG, SLJIT_MEM0(), (sljit_sw)value); - sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, SLJIT_TMP_DEST_FREG, SLJIT_MEM1(kFrameReg), offset); - return; - } -#endif /* HAS_SIMD */ - case ByteCode::Const32Opcode: { - uint32_t value32 = reinterpret_cast(instr->byteCode())->value(); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset, SLJIT_IMM, static_cast(value32)); - return; - } - default: { - ASSERT(instr->opcode() == ByteCode::Const64Opcode); - - uint64_t value64 = reinterpret_cast(instr->byteCode())->value(); - - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset + WORD_LOW_OFFSET, SLJIT_IMM, static_cast(value64)); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset + WORD_HIGH_OFFSET, SLJIT_IMM, static_cast(value64 >> 32)); - return; - } - } -} - static void emitDivRem32(sljit_compiler* compiler, sljit_s32 opcode, JITArg* args) { CompileContext* context = CompileContext::get(compiler); @@ -105,9 +86,7 @@ static void emitDivRem32(sljit_compiler* compiler, sljit_s32 opcode, JITArg* arg } } - MOVE_TO_REG(compiler, SLJIT_MOV, SLJIT_R1, args[1].arg, args[1].argw); - MOVE_TO_REG(compiler, SLJIT_MOV, SLJIT_R0, args[0].arg, args[0].argw); - + emitInitR0R1(compiler, SLJIT_MOV, SLJIT_MOV, args); sljit_jump* moduloJumpFrom = nullptr; if (SLJIT_IS_IMM(args[1].arg)) { @@ -154,16 +133,22 @@ static void emitSimpleBinary64(sljit_compiler* compiler, sljit_s32 op1, sljit_s3 sljit_s32 dst0 = instr->requiredReg(0); sljit_s32 dst1 = instr->requiredReg(1); - for (int i = 0; i < 2; i++) { - if (SLJIT_IS_MEM(args[i].arg1)) { - sljit_emit_op1(compiler, SLJIT_MOV, dst0, 0, args[i].arg1, args[i].arg1w); - sljit_emit_op1(compiler, SLJIT_MOV, dst1, 0, args[i].arg2, args[i].arg2w); - - args[i].arg1 = dst0; - args[i].arg1w = 0; - args[i].arg2 = dst1; - args[i].arg2w = 0; - break; + if (args[0].arg1 != dst0 && args[1].arg1 != dst0) { + ASSERT(args[0].arg1 != dst1 && args[1].arg1 != dst1 + && args[0].arg1 != dst0 && args[0].arg2 != dst1 + && args[1].arg2 != dst0 && args[1].arg2 != dst1); + + for (int i = 0; i < 2; i++) { + if (SLJIT_IS_MEM(args[i].arg1)) { + sljit_emit_op1(compiler, SLJIT_MOV, dst0, 0, args[i].arg1, args[i].arg1w); + sljit_emit_op1(compiler, SLJIT_MOV, dst1, 0, args[i].arg2, args[i].arg2w); + + args[i].arg1 = dst0; + args[i].arg1w = 0; + args[i].arg2 = dst1; + args[i].arg2w = 0; + break; + } } } @@ -346,19 +331,15 @@ static void emitRotate64(sljit_compiler* compiler, sljit_s32 op, Instruction* in static void emitMul64(sljit_compiler* compiler, Instruction* instr, JITArgPair* args) { - sljit_s32 tmpReg = instr->requiredReg(2); - - ASSERT(instr->requiredReg(0) == SLJIT_R0 && instr->requiredReg(1) == SLJIT_R1); + sljit_s32 tmpReg = instr->requiredReg(0); - if (args[1].arg1 == SLJIT_R0 || (!SLJIT_IS_IMM(args[1].arg1) && SLJIT_IS_IMM(args[0].arg1))) { + if (!SLJIT_IS_IMM(args[1].arg1) && SLJIT_IS_IMM(args[0].arg1)) { // Swap arguments. JITArgPair tmp = args[0]; args[0] = args[1]; args[1] = tmp; } - ASSERT(args[1].arg1 != SLJIT_R0 && args[1].arg2 != SLJIT_R1); - if (SLJIT_IS_IMM(args[1].arg1)) { if (args[1].arg1w == 0) { sljit_emit_op2(compiler, SLJIT_MUL, args[2].arg2, args[2].arg2w, args[0].arg1, args[0].arg1w, SLJIT_IMM, args[1].arg2w); @@ -376,43 +357,75 @@ static void emitMul64(sljit_compiler* compiler, Instruction* instr, JITArgPair* return; } - MOVE_TO_REG(compiler, SLJIT_MOV, SLJIT_R0, args[0].arg1, args[0].arg1w); + sljit_s32 baseReg = SLJIT_R0; + sljit_s32 immReg = SLJIT_R1; - if (SLJIT_IS_MEM(args[0].arg2)) { + if (args[0].arg1 == SLJIT_R1) { + baseReg = SLJIT_R1; + immReg = SLJIT_R0; + } else { + MOVE_TO_REG(compiler, SLJIT_MOV, baseReg, args[0].arg1, args[0].arg1w); + } + + if (SLJIT_IS_MEM(args[0].arg2) || args[0].arg2 == immReg) { sljit_emit_op1(compiler, SLJIT_MOV, tmpReg, 0, args[0].arg2, args[0].arg2w); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, args[1].arg1w); - sljit_emit_op2(compiler, SLJIT_MUL, tmpReg, 0, tmpReg, 0, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, immReg, 0, SLJIT_IMM, args[1].arg1w); + sljit_emit_op2(compiler, SLJIT_MUL, tmpReg, 0, tmpReg, 0, immReg, 0); } else { - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, args[1].arg1w); - sljit_emit_op2(compiler, SLJIT_MUL, tmpReg, 0, args[0].arg2, args[0].arg2w, SLJIT_R1, 0); + sljit_emit_op1(compiler, SLJIT_MOV, immReg, 0, SLJIT_IMM, args[1].arg1w); + sljit_emit_op2(compiler, SLJIT_MUL, tmpReg, 0, args[0].arg2, args[0].arg2w, immReg, 0); } if (args[1].arg2w == 1) { - sljit_emit_op2(compiler, SLJIT_ADD, tmpReg, 0, tmpReg, 0, SLJIT_R0, 0); + sljit_emit_op2(compiler, SLJIT_ADD, tmpReg, 0, tmpReg, 0, baseReg, 0); } else if (args[1].arg2w != 0) { - sljit_emit_op2r(compiler, SLJIT_MULADD, tmpReg, SLJIT_R0, 0, SLJIT_IMM, args[1].arg2w); + sljit_emit_op2r(compiler, SLJIT_MULADD, tmpReg, baseReg, 0, SLJIT_IMM, args[1].arg2w); + } + } else { + if (args[1].arg1 == SLJIT_R0 || args[1].arg2 == SLJIT_R0) { + // Swap arguments. + JITArgPair tmp = args[0]; + args[0] = args[1]; + args[1] = tmp; } - sljit_emit_op0(compiler, SLJIT_LMUL_UW); - sljit_emit_op2(compiler, SLJIT_ADD, args[2].arg2, args[2].arg2w, SLJIT_R1, 0, tmpReg, 0); - MOVE_FROM_REG(compiler, SLJIT_MOV, args[2].arg1, args[2].arg1w, SLJIT_R0); - return; - } + sljit_s32 lowReg0 = SLJIT_R0; + sljit_s32 lowReg1 = SLJIT_R1; + sljit_s32 firstIndex = 0; - MOVE_TO_REG(compiler, SLJIT_MOV, SLJIT_R0, args[0].arg1, args[0].arg1w); + if (args[0].arg2 == SLJIT_R0) { + lowReg0 = SLJIT_R1; + lowReg1 = SLJIT_R0; + } else { + MOVE_TO_REG(compiler, SLJIT_MOV, SLJIT_R0, args[0].arg1, args[0].arg1w); + } - if (args[0].arg2 == SLJIT_R1 || SLJIT_IS_MEM(args[0].arg2)) { - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, args[0].arg2, args[0].arg2w); - args[0].arg2 = SLJIT_TMP_DEST_REG; - args[0].arg2w = 0; + if (args[1].arg2 == lowReg1) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, args[1].arg2, args[1].arg2w); + args[1].arg2 = SLJIT_TMP_DEST_REG; + args[1].arg2w = 0; + firstIndex = 1; + } else if (args[0].arg2 == lowReg1 || SLJIT_IS_MEM(args[0].arg2)) { + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, args[0].arg2, args[0].arg2w); + args[0].arg2 = SLJIT_TMP_DEST_REG; + args[0].arg2w = 0; + } + + MOVE_TO_REG(compiler, SLJIT_MOV, lowReg1, args[1].arg1, args[1].arg1w); + sljit_emit_op2(compiler, SLJIT_MUL, tmpReg, 0, args[firstIndex].arg2, args[firstIndex].arg2w, lowReg1, 0); + + firstIndex ^= 0x1; + if (args[firstIndex].arg2 != lowReg1) { + sljit_emit_op2r(compiler, SLJIT_MULADD, tmpReg, args[firstIndex].arg2, args[firstIndex].arg2w, lowReg0, 0); + } else { + sljit_emit_op2(compiler, SLJIT_SHL, tmpReg, 0, tmpReg, 0, SLJIT_IMM, 1); + } } - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, args[1].arg1, args[1].arg1w); - sljit_emit_op2(compiler, SLJIT_MUL, tmpReg, 0, args[0].arg2, args[0].arg2w, SLJIT_R1, 0); - sljit_emit_op2r(compiler, SLJIT_MULADD, tmpReg, args[1].arg2, args[1].arg2w, SLJIT_R0, 0); sljit_emit_op0(compiler, SLJIT_LMUL_UW); - sljit_emit_op2(compiler, SLJIT_ADD, args[2].arg2, args[2].arg2w, SLJIT_R1, 0, tmpReg, 0); + sljit_emit_op2(compiler, SLJIT_ADD, tmpReg, 0, tmpReg, 0, SLJIT_R1, 0); MOVE_FROM_REG(compiler, SLJIT_MOV, args[2].arg1, args[2].arg1w, SLJIT_R0); + MOVE_FROM_REG(compiler, SLJIT_MOV, args[2].arg2, args[2].arg2w, tmpReg); } static sljit_sw signedDiv64(int64_t* dividend, int64_t* divisor, int64_t* quotient) @@ -1180,12 +1193,12 @@ static void emitGlobalGet64(sljit_compiler* compiler, Instruction* instr) { CompileContext* context = CompileContext::get(compiler); GlobalGet64* globalGet = reinterpret_cast(instr->byteCode()); - sljit_s32 baseReg = (instr->info() & Instruction::kIsGlobalFloatBit) ? SLJIT_TMP_MEM_REG : instr->requiredReg(0); + sljit_s32 baseReg = (instr->info() & Instruction::kHasFloatOperand) ? SLJIT_TMP_MEM_REG : instr->requiredReg(0); sljit_emit_op1(compiler, SLJIT_MOV_P, baseReg, 0, SLJIT_MEM1(kContextReg), OffsetOfContextField(instance)); sljit_emit_op1(compiler, SLJIT_MOV, baseReg, 0, SLJIT_MEM1(baseReg), context->globalsStart + globalGet->index() * sizeof(void*)); - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { JITArg dstArg(instr->operands()); moveFloatToDest(compiler, SLJIT_MOV_F64, dstArg, JITFieldAccessor::globalValueOffset()); return; @@ -1212,7 +1225,7 @@ static void emitGlobalSet64(sljit_compiler* compiler, Instruction* instr) GlobalSet32* globalSet = reinterpret_cast(instr->byteCode()); - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { JITArg src; floatOperandToArg(compiler, instr->operands(), src, SLJIT_TMP_DEST_FREG); sljit_s32 baseReg = SLJIT_TMP_MEM_REG; diff --git a/src/jit/IntMath64Inl.h b/src/jit/IntMath64Inl.h index f63a6428e..56c1f32c4 100644 --- a/src/jit/IntMath64Inl.h +++ b/src/jit/IntMath64Inl.h @@ -16,33 +16,6 @@ /* Only included by jit-backend.cc */ -static void emitStoreImmediate(sljit_compiler* compiler, sljit_sw offset, Instruction* instr) -{ - switch (instr->opcode()) { -#ifdef HAS_SIMD - case ByteCode::Const128Opcode: { - const uint8_t* value = reinterpret_cast(instr->byteCode())->value(); - - sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, SLJIT_TMP_DEST_FREG, SLJIT_MEM0(), (sljit_sw)value); - sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, SLJIT_TMP_DEST_FREG, SLJIT_MEM1(kFrameReg), offset); - return; - } -#endif /* HAS_SIMD */ - case ByteCode::Const32Opcode: { - uint32_t value32 = reinterpret_cast(instr->byteCode())->value(); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(kFrameReg), offset, SLJIT_IMM, static_cast(value32)); - return; - } - default: { - ASSERT(instr->opcode() == ByteCode::Const64Opcode); - - uint64_t value64 = reinterpret_cast(instr->byteCode())->value(); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(kFrameReg), offset, SLJIT_IMM, static_cast(value64)); - return; - } - } -} - enum DivRemOptions : sljit_s32 { DivRem32 = 1 << 1, DivRemSigned = 1 << 0, @@ -64,8 +37,7 @@ static void emitDivRem(sljit_compiler* compiler, sljit_s32 opcode, JITArg* args, } } - MOVE_TO_REG(compiler, movOpcode, SLJIT_R1, args[1].arg, args[1].argw); - MOVE_TO_REG(compiler, movOpcode, SLJIT_R0, args[0].arg, args[0].argw); + emitInitR0R1(compiler, SLJIT_MOV, SLJIT_MOV, args); sljit_jump* moduloJumpFrom = nullptr; @@ -566,7 +538,7 @@ static void emitGlobalGet64(sljit_compiler* compiler, Instruction* instr) sljit_emit_op1(compiler, SLJIT_MOV_P, SLJIT_TMP_MEM_REG, 0, SLJIT_MEM1(kContextReg), OffsetOfContextField(instance)); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_MEM1(SLJIT_TMP_MEM_REG), context->globalsStart + globalGet->index() * sizeof(void*)); - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { moveFloatToDest(compiler, SLJIT_MOV_F64, dstArg, JITFieldAccessor::globalValueOffset()); } else { moveIntToDest(compiler, SLJIT_MOV, dstArg, JITFieldAccessor::globalValueOffset()); @@ -580,7 +552,7 @@ static void emitGlobalSet64(sljit_compiler* compiler, Instruction* instr) JITArg src; sljit_s32 baseReg; - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { floatOperandToArg(compiler, instr->operands(), src, SLJIT_TMP_DEST_FREG); baseReg = SLJIT_TMP_MEM_REG; } else { @@ -591,7 +563,7 @@ static void emitGlobalSet64(sljit_compiler* compiler, Instruction* instr) sljit_emit_op1(compiler, SLJIT_MOV, baseReg, 0, SLJIT_MEM1(kContextReg), OffsetOfContextField(instance)); if (SLJIT_IS_MEM(src.arg)) { - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_TMP_DEST_FREG, 0, src.arg, src.argw); src.arg = SLJIT_TMP_DEST_FREG; } else { @@ -603,7 +575,7 @@ static void emitGlobalSet64(sljit_compiler* compiler, Instruction* instr) sljit_emit_op1(compiler, SLJIT_MOV, baseReg, 0, SLJIT_MEM1(baseReg), context->globalsStart + globalSet->index() * sizeof(void*)); - if (instr->info() & Instruction::kIsGlobalFloatBit) { + if (instr->info() & Instruction::kHasFloatOperand) { sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(baseReg), JITFieldAccessor::globalValueOffset(), src.arg, src.argw); } else { sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(baseReg), JITFieldAccessor::globalValueOffset(), src.arg, src.argw); diff --git a/src/jit/MemoryInl.h b/src/jit/MemoryInl.h index a6c5fe778..3bf765bd2 100644 --- a/src/jit/MemoryInl.h +++ b/src/jit/MemoryInl.h @@ -226,11 +226,11 @@ static void emitLoad(sljit_compiler* compiler, Instruction* instr) switch (instr->opcode()) { case ByteCode::Load32Opcode: - opcode = SLJIT_MOV32; + opcode = (instr->info() & Instruction::kHasFloatOperand) ? SLJIT_MOV_F32 : SLJIT_MOV32; size = 4; break; case ByteCode::Load64Opcode: - opcode = SLJIT_MOV; + opcode = (instr->info() & Instruction::kHasFloatOperand) ? SLJIT_MOV_F64 : SLJIT_MOV; size = 8; break; case ByteCode::I32LoadOpcode: @@ -567,11 +567,11 @@ static void emitStore(sljit_compiler* compiler, Instruction* instr) switch (instr->opcode()) { case ByteCode::Store32Opcode: - opcode = SLJIT_MOV32; + opcode = (instr->info() & Instruction::kHasFloatOperand) ? SLJIT_MOV_F32 : SLJIT_MOV32; size = 4; break; case ByteCode::Store64Opcode: - opcode = SLJIT_MOV; + opcode = (instr->info() & Instruction::kHasFloatOperand) ? SLJIT_MOV_F64 : SLJIT_MOV; size = 8; break; case ByteCode::I32StoreOpcode: @@ -883,13 +883,7 @@ static void emitMemory(sljit_compiler* compiler, Instruction* instr) case ByteCode::MemoryFillOpcode: { ASSERT(instr->info() & Instruction::kIsCallback); - JITArg srcArg; - - for (int i = 0; i < 3; i++) { - srcArg.set(params + i); - sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R(i), 0, srcArg.arg, srcArg.argw); - } - + emitInitR0R1R2(compiler, SLJIT_MOV32, params); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, kContextReg, 0); sljit_sw addr; diff --git a/src/jit/RegisterAlloc.cpp b/src/jit/RegisterAlloc.cpp index 49be423bf..820110e62 100644 --- a/src/jit/RegisterAlloc.cpp +++ b/src/jit/RegisterAlloc.cpp @@ -23,13 +23,581 @@ namespace Walrus { +#if 1 + +class RegisterSet { +public: + RegisterSet(uint32_t numberOfScratchRegs, uint32_t numberOfSavedRegs, bool isInteger); + + void reserve(uint8_t reg) { m_registers[reg].rangeEnd = kReservedReg; } + + void updateVariable(uint8_t reg, VariableList::Variable* variable) + { + ASSERT(m_registers[reg].rangeEnd != kUnassignedReg && m_registers[reg].variable != nullptr); + m_registers[reg].variable = variable; + } + + uint8_t getSavedRegCount() { return static_cast(m_usedSavedRegisters - m_savedStartIndex); } + + uint8_t toCPUReg(uint8_t reg); + bool check(int8_t reg, uint16_t constraints); + void freeUnusedRegisters(size_t id); + uint8_t allocateRegister(VariableList::Variable* variable); +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + uint8_t allocateRegisterPair(VariableList::Variable* variable, uint8_t* otherReg); + + void setDestroysR0R1() + { + m_regStatus |= kDestroysR0R1; + } + + void clearDestroysR0R1() + { + m_regStatus = static_cast(m_regStatus & ~kDestroysR0R1); + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + uint8_t allocateQuadRegister(VariableList::Variable* variable); +#endif /* SLJIT_CONFIG_ARM_32 */ + +private: + static const uint8_t kIsInteger = 1 << 0; + static const uint8_t kDestroysR0R1 = 1 << 1; + + static const size_t kReservedReg = 0; + static const size_t kUnassignedReg = ~(size_t)0; + + struct RegisterInfo { + RegisterInfo() + : rangeEnd(kUnassignedReg) + , variable(nullptr) + { + } + + size_t rangeEnd; + VariableList::Variable* variable; + }; + + // Free registers. + uint8_t m_regStatus; + uint8_t m_savedStartIndex; + uint8_t m_usedSavedRegisters; + + // Allocated registers. + std::vector m_registers; +}; + +class RegisterFile { +public: + RegisterFile(uint32_t numberOfIntegerScratchRegs, uint32_t numberOfIntegerSavedRegs, + uint32_t numberOfFloatScratchRegs, uint32_t numberOfFloatSavedRegs) + : m_integerSet(numberOfIntegerScratchRegs, numberOfIntegerSavedRegs, true) + , m_floatSet(numberOfFloatScratchRegs, numberOfFloatSavedRegs, false) + { + } + + RegisterSet& integerSet() { return m_integerSet; } + RegisterSet& floatSet() { return m_floatSet; } + + uint8_t toCPUIntegerReg(uint8_t reg) + { + return m_integerSet.toCPUReg(reg); + } + + uint8_t toCPUFloatReg(uint8_t reg) + { + return m_floatSet.toCPUReg(reg); + } + + void integerReserve(uint8_t reg) + { + m_integerSet.reserve(reg); + } + + void floatReserve(uint8_t reg) + { + m_floatSet.reserve(reg); + } + + void allocateVariable(VariableList::Variable* variable) + { + uint8_t type = variable->info & Instruction::TypeMask; + ASSERT(type > 0); + + if (type & Instruction::FloatOperandMarker) { +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + if ((variable->info & Instruction::TypeMask) == Instruction::V128Operand) { + m_floatSet.allocateQuadRegister(variable); + return; + } +#endif /* SLJIT_CONFIG_ARM_32 */ + m_floatSet.allocateRegister(variable); + return; + } + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (type == Instruction::Int64Operand) { + m_integerSet.allocateRegisterPair(variable, nullptr); + return; + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + m_integerSet.allocateRegister(variable); + } + + void freeUnusedRegisters(size_t id) + { + m_integerSet.freeUnusedRegisters(id); + m_floatSet.freeUnusedRegisters(id); + } + + bool reuseResult(uint8_t type, VariableList::Variable** reusableRegs, VariableList::Variable* resultVariable); + +private: + RegisterSet m_integerSet; + RegisterSet m_floatSet; +}; + +RegisterSet::RegisterSet(uint32_t numberOfScratchRegs, uint32_t numberOfSavedRegs, bool isInteger) + : m_regStatus(isInteger ? kIsInteger : 0) + , m_savedStartIndex(numberOfScratchRegs) + , m_usedSavedRegisters(numberOfScratchRegs) +{ + m_registers.resize(numberOfScratchRegs + numberOfSavedRegs); +} + +uint8_t RegisterSet::toCPUReg(uint8_t reg) +{ + if (reg < m_savedStartIndex) { + return SLJIT_R0 + reg; + } + + uint8_t base = (m_regStatus & kIsInteger) ? SLJIT_S2 : SLJIT_FS0; + return base - (reg - m_savedStartIndex); +} + +bool RegisterSet::check(int8_t reg, uint16_t constraints) +{ + if (constraints & VariableList::kIsCallback) { + return reg >= m_savedStartIndex; + } + + if ((m_regStatus & kIsInteger) && (constraints & VariableList::kDestroysR0R1)) { + return reg >= 2; + } + + return true; +} + +void RegisterSet::freeUnusedRegisters(size_t id) +{ + size_t size = m_registers.size(); + + for (size_t i = 0; i < size; i++) { + RegisterInfo& info = m_registers[i]; + + if (info.rangeEnd == kReservedReg) { + if (info.variable == nullptr) { + info.rangeEnd = kUnassignedReg; + continue; + } + + info.rangeEnd = info.variable->rangeEnd; + } + + if (info.rangeEnd < id) { + info.rangeEnd = kUnassignedReg; + info.variable = nullptr; + } + } +} + +uint8_t RegisterSet::allocateRegister(VariableList::Variable* variable) +{ + size_t maxRangeEnd = 0; + size_t maxRangeIndex = 0; + uint16_t constraints = variable != nullptr ? variable->info : 0; + size_t size = m_registers.size(); + size_t i = 0; + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (m_regStatus & kDestroysR0R1) { + constraints = VariableList::kDestroysR0R1; + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + if (constraints & VariableList::kIsCallback) { + i = m_savedStartIndex; + } else if ((constraints & VariableList::kDestroysR0R1) && (m_regStatus & kIsInteger)) { + i = 2; + } + + while (i < size) { + if (m_registers[i].rangeEnd == kUnassignedReg) { + break; + } + + if (m_registers[i].rangeEnd > maxRangeEnd) { + maxRangeEnd = m_registers[i].rangeEnd; + maxRangeIndex = i; + } + + i++; + } + + if (i == size) { + ASSERT(maxRangeEnd != 0 || variable != nullptr); + + if (variable != nullptr && variable->rangeEnd >= maxRangeEnd) { + return VariableList::kUnusedReg; + } + + // Move variable into memory. + i = maxRangeIndex; + ASSERT(m_registers[i].variable != nullptr); + + VariableList::Variable* prevVariable = m_registers[i].variable; + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (prevVariable->reg2 != prevVariable->reg1) { + size_t other = (i == prevVariable->reg1) ? prevVariable->reg2 : prevVariable->reg1; + + m_registers[other].rangeEnd = kUnassignedReg; + m_registers[other].variable = nullptr; + } + prevVariable->reg2 = VariableList::kUnusedReg; +#endif /* SLJIT_32BIT_ARCHITECTURE */ + prevVariable->reg1 = VariableList::kUnusedReg; + } + + // Allocated registers are also reserved for the current byte code. + m_registers[i].rangeEnd = kReservedReg; + m_registers[i].variable = variable; + + if (variable != nullptr) { + variable->reg1 = i; +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + variable->reg2 = i; +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } + + if (i >= m_usedSavedRegisters) { + m_usedSavedRegisters = i + 1; + } + + return i; +} + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +uint8_t RegisterSet::allocateRegisterPair(VariableList::Variable* variable, uint8_t* otherReg) +{ + size_t maxRangeEndSingle1 = 0; + size_t maxRangeIndexSingle1 = 0; + size_t maxRangeEndSingle2 = 0; + size_t maxRangeIndexSingle2 = 0; + size_t maxRangeEndPair = 0; + size_t maxRangeIndexPair = 0; + size_t freeReg = VariableList::kUnusedReg; + uint16_t constraints = variable != nullptr ? variable->info : 0; + size_t size = m_registers.size(); + size_t i = 0; + + if (constraints & VariableList::kIsCallback) { + i = m_savedStartIndex; + } else if ((constraints & VariableList::kDestroysR0R1) && (m_regStatus & kIsInteger)) { + i = 2; + } + + while (i < size) { + if (m_registers[i].rangeEnd == kUnassignedReg) { + if (freeReg != VariableList::kUnusedReg) { + break; + } + + freeReg = i; + } else if (m_registers[i].variable != nullptr) { + VariableList::Variable* targetVariable = m_registers[i].variable; + + if (targetVariable->reg1 != targetVariable->reg2) { + if (targetVariable->rangeEnd > maxRangeEndPair) { + maxRangeEndPair = targetVariable->rangeEnd; + maxRangeIndexPair = i; + } + } else if (targetVariable->rangeEnd > maxRangeEndSingle1) { + maxRangeEndSingle2 = maxRangeEndSingle1; + maxRangeIndexSingle2 = maxRangeIndexSingle1; + maxRangeEndSingle1 = targetVariable->rangeEnd; + maxRangeIndexSingle1 = i; + } else if (targetVariable->rangeEnd > maxRangeEndSingle2) { + maxRangeEndSingle2 = targetVariable->rangeEnd; + maxRangeIndexSingle2 = i; + } + } + + i++; + } + + ASSERT(maxRangeEndSingle2 <= maxRangeEndSingle1); + + if (i == size) { + if (maxRangeEndPair == 0 + && (maxRangeEndSingle1 == 0 || (maxRangeEndSingle2 == 0 && freeReg == VariableList::kUnusedReg))) { + ASSERT(variable != nullptr); + return VariableList::kUnusedReg; + } + + size_t maxRangeEnd; + + if (freeReg != VariableList::kUnusedReg) { + if (maxRangeEndSingle1 >= maxRangeEndPair) { + i = maxRangeIndexSingle1; + maxRangeEnd = maxRangeEndSingle1; + } else { + i = maxRangeEndPair; + maxRangeEnd = maxRangeEndPair; + } + } else if (maxRangeEndPair < maxRangeEndSingle1 && maxRangeEndPair < maxRangeEndSingle2) { + i = maxRangeIndexSingle1; + maxRangeEnd = maxRangeEndSingle2; + freeReg = maxRangeIndexSingle2; + + VariableList::Variable* prevVariable = m_registers[freeReg].variable; + prevVariable->reg1 = VariableList::kUnusedReg; + prevVariable->reg2 = VariableList::kUnusedReg; + } else { + i = maxRangeIndexPair; + maxRangeEnd = maxRangeEndPair; + + VariableList::Variable* targetVariable = m_registers[i].variable; + freeReg = (targetVariable->reg1 != i) ? targetVariable->reg1 : targetVariable->reg2; + } + + if (variable != nullptr && variable->rangeEnd >= maxRangeEnd) { + return VariableList::kUnusedReg; + } + + // Move variable into memory. + VariableList::Variable* prevVariable = m_registers[i].variable; + ASSERT(prevVariable != nullptr); + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (prevVariable->reg2 != prevVariable->reg1) { + size_t other = (i == prevVariable->reg1) ? prevVariable->reg2 : prevVariable->reg1; + + ASSERT(other != VariableList::kUnusedReg); + m_registers[other].rangeEnd = 0; + m_registers[other].variable = nullptr; + } + prevVariable->reg2 = VariableList::kUnusedReg; +#endif /* SLJIT_32BIT_ARCHITECTURE */ + prevVariable->reg1 = VariableList::kUnusedReg; + } + + ASSERT(i < size && freeReg < size); + + // Allocated registers are also reserved for the current byte code. + m_registers[i].rangeEnd = kReservedReg; + m_registers[i].variable = variable; + m_registers[freeReg].rangeEnd = kReservedReg; + m_registers[freeReg].variable = variable; + + if (variable != nullptr) { + variable->reg1 = freeReg; +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + variable->reg2 = i; +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } + + if (i >= m_usedSavedRegisters) { + m_usedSavedRegisters = i + 1; + } + + if (freeReg >= m_usedSavedRegisters) { + m_usedSavedRegisters = freeReg + 1; + } + + if (otherReg != nullptr) { + *otherReg = i; + } + + return freeReg; +} +#endif /* SLJIT_32BIT_ARCHITECTURE */ + +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) +uint8_t RegisterSet::allocateQuadRegister(VariableList::Variable* variable) +{ + size_t maxRangeEnd = 0; + size_t maxRangeIndex = 0; + uint16_t constraints = variable != nullptr ? variable->info : 0; + size_t size = m_registers.size(); + size_t i = 0; + + ASSERT(!(m_regStatus & kIsInteger) && (size & 0x1) == 0); + + if (constraints & VariableList::kIsCallback) { + i = m_savedStartIndex; + } + + while (i < size) { + if (m_registers[i].rangeEnd == kUnassignedReg) { + if (m_registers[i + 1].rangeEnd == kUnassignedReg) { + break; + } + + if (m_registers[i + 1].rangeEnd > maxRangeEnd) { + maxRangeEnd = m_registers[i + 1].rangeEnd; + maxRangeIndex = i; + } + } else if (m_registers[i + 1].rangeEnd == kUnassignedReg) { + if (m_registers[i].rangeEnd > maxRangeEnd) { + maxRangeEnd = m_registers[i].rangeEnd; + maxRangeIndex = i; + } + } else { + size_t averageEnd = ((m_registers[i].rangeEnd + m_registers[i + 1].rangeEnd) + 1) >> 1; + + if (averageEnd > maxRangeEnd) { + maxRangeEnd = averageEnd; + maxRangeIndex = i; + } + } + + i += 2; + } + + if (i == size) { + ASSERT(maxRangeEnd != 0 || variable != nullptr); + + if (variable != nullptr && variable->rangeEnd >= maxRangeEnd) { + return VariableList::kUnusedReg; + } + + // Move variable into memory. + i = maxRangeIndex; + ASSERT(m_registers[i].variable != nullptr); + + VariableList::Variable* prevVariable = m_registers[i].variable; + ASSERT(prevVariable->reg2 == prevVariable->reg1 || prevVariable->reg2 == prevVariable->reg1 + 1); + prevVariable->reg1 = VariableList::kUnusedReg; + prevVariable->reg2 = VariableList::kUnusedReg; + + prevVariable = m_registers[i + 1].variable; + ASSERT(prevVariable->reg2 == prevVariable->reg1 || prevVariable->reg2 == prevVariable->reg1 + 1); + prevVariable->reg1 = VariableList::kUnusedReg; + prevVariable->reg2 = VariableList::kUnusedReg; + } + + // Allocated registers are also reserved for the current byte code. + m_registers[i].rangeEnd = kReservedReg; + m_registers[i].variable = variable; + m_registers[i + 1].rangeEnd = kReservedReg; + m_registers[i + 1].variable = variable; + + if (variable != nullptr) { + variable->reg1 = i; + variable->reg2 = i + 1; + } + + if (i + 1 >= static_cast(m_usedSavedRegisters)) { + m_usedSavedRegisters = i + 2; + } + + return i; +} +#endif /* SLJIT_CONFIG_ARM_32 */ + +static inline int reuseTemporary(uint8_t type, VariableList::Variable** reusableRegs) +{ + if (!(type & (Instruction::Src0Allowed | Instruction::Src1Allowed | Instruction::Src2Allowed))) { + return -1; + } + + for (uint32_t i = 0; i < 3; i++) { + if ((type & (Instruction::Src0Allowed << i)) && reusableRegs[i] != nullptr) { + return i; + } + } + + return -1; +} + +bool RegisterFile::reuseResult(uint8_t type, VariableList::Variable** reusableRegs, VariableList::Variable* resultVariable) +{ + if (!(type & (Instruction::Src0Allowed | Instruction::Src1Allowed | Instruction::Src2Allowed))) { + return false; + } + + uint16_t constraints = resultVariable->info; + RegisterSet& registers = (type & Instruction::FloatOperandMarker) ? m_floatSet : m_integerSet; +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + bool isInt64 = (type & Instruction::TypeMask) == Instruction::Int64Operand; +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + for (uint32_t i = 0; i < 3; i++) { + VariableList::Variable* variable = reusableRegs[i]; + if ((type & (Instruction::Src0Allowed << i)) && variable != nullptr && registers.check(variable->reg1, constraints)) { +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (isInt64 && !registers.check(variable->reg2, constraints)) { + continue; + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + reusableRegs[i] = nullptr; + + resultVariable->reg1 = variable->reg1; + registers.updateVariable(resultVariable->reg1, resultVariable); + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + resultVariable->reg2 = variable->reg2; + registers.updateVariable(resultVariable->reg2, resultVariable); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + return true; + } + } + + return false; +} + void JITCompiler::allocateRegisters() { if (m_variableList == nullptr) { + m_savedIntegerRegCount = 0; + m_savedFloatRegCount = 0; return; } - // Dummy register allocator. +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + const uint32_t numberOfscratchRegs = 3; + const uint32_t numberOfsavedRegs = 1; +#else /* !SLJIT_CONFIG_X86_32 */ + const uint32_t numberOfscratchRegs = SLJIT_NUMBER_OF_SCRATCH_REGISTERS; + const uint32_t numberOfsavedRegs = SLJIT_NUMBER_OF_SAVED_REGISTERS - 2; +#endif /* SLJIT_CONFIG_X86_32 */ + + RegisterFile regs(numberOfscratchRegs, numberOfsavedRegs, + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); + + size_t variableListParamCount = m_variableList->paramCount; + for (size_t i = 0; i < variableListParamCount; i++) { + VariableList::Variable* variable = m_variableList->variables.data() + i; + + ASSERT(!(variable->info & VariableList::kIsImmediate)); + + if (variable->info & VariableList::kIsMerged) { + continue; + } + + ASSERT(variable->u.rangeStart == 0); + + if (variable->rangeEnd == 0) { + continue; + } + + regs.allocateVariable(variable); + } + for (InstructionListItem* item = m_first; item != nullptr; item = item->next()) { if (item->isLabel()) { continue; @@ -37,26 +605,322 @@ void JITCompiler::allocateRegisters() Instruction* instr = item->asInstruction(); Operand* operand = instr->operands(); - Operand* end = operand + instr->paramCount() + instr->resultCount(); - const uint8_t* list = instr->getOperandDescriptor(); + const uint8_t* list = m_variableList->getOperandDescriptor(instr); uint32_t paramCount = instr->paramCount(); - uint32_t resultCount = instr->resultCount(); + size_t instrId = instr->id(); + bool hasResult = instr->resultCount() > 0; + + regs.freeUnusedRegisters(instrId + ((instr->info() & Instruction::kFreeUnusedEarly) ? 1 : 0)); + operand = instr->operands(); + instr->setRequiredRegsDescriptor(0); + + if (*list == 0) { + // No register assignment required. + ASSERT(instr->opcode() == ByteCode::EndOpcode || instr->opcode() == ByteCode::ThrowOpcode + || instr->opcode() == ByteCode::CallOpcode || instr->opcode() == ByteCode::CallIndirectOpcode + || instr->opcode() == ByteCode::ElemDropOpcode || instr->opcode() == ByteCode::DataDropOpcode + || instr->opcode() == ByteCode::JumpOpcode || instr->opcode() == ByteCode::UnreachableOpcode); + + if (!hasResult) { + continue; + } + + operand += paramCount; + Operand* end = operand + instr->resultCount(); + + while (operand < end) { + VariableList::Variable* resultVariable = m_variableList->variables.data() + operand->ref; + + if (resultVariable->u.rangeStart == instrId && resultVariable->rangeEnd != instrId) { + regs.allocateVariable(resultVariable); + } + operand++; + } + continue; + } + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (instr->opcode() == ByteCode::I64MulOpcode) { + regs.integerSet().setDestroysR0R1(); + instr->setRequiredReg(0, regs.toCPUIntegerReg(regs.integerSet().allocateRegister(nullptr))); + regs.integerSet().clearDestroysR0R1(); + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + + // Step 1: check params first. Source registers which live range ends + // can be reallocated if the corresponding SrcXAllowed flag is set. + ASSERT(paramCount <= 3 && instr->resultCount() <= 1); + VariableList::Variable* reusableRegs[3] = { nullptr }; uint32_t tmpIndex = 0; - uint32_t nextIntIndex = SLJIT_R0; - uint32_t nextFloatIndex = SLJIT_FR0; - while (operand < end) { - VariableRef ref = operand->ref; + for (uint32_t i = 0; i < paramCount; i++) { + ASSERT((*list & Instruction::TypeMask) >= Instruction::Int32Operand + && (*list & Instruction::TypeMask) <= Instruction::Float64Operand); - VariableList::Variable& variable = m_variableList->variables[ref]; + VariableList::Variable* variable = m_variableList->variables.data() + operand->ref; - ASSERT((variable.info & Instruction::TypeMask) > 0); - operand->ref = variable.value; + if (!(variable->info & VariableList::kIsImmediate) + && variable->rangeEnd == instrId + && variable->reg1 != VariableList::kUnusedReg) { + reusableRegs[i] = variable; + } + + ASSERT(!(*list & Instruction::TmpRequired) || (*list & Instruction::FloatOperandMarker)); + + if ((*list & Instruction::FloatOperandMarker) && !(*list & Instruction::TmpNotAllowed)) { + // Source registers are read-only. + if ((*list & Instruction::TmpRequired) || (variable->info & VariableList::kIsImmediate)) { + uint8_t reg = variable->reg1; + + if (reg != VariableList::kUnusedReg) { + ASSERT(!(variable->info & VariableList::kIsImmediate)); + regs.floatReserve(reg); +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + if ((*list & Instruction::TypeMask) != Instruction::V128Operand) { + regs.floatReserve(reg + 1); + } +#endif /* SLJIT_CONFIG_ARM_32 */ + } else { +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + if ((*list & Instruction::TypeMask) == Instruction::V128Operand) { + reg = regs.floatSet().allocateQuadRegister(nullptr); + } else { +#endif /* SLJIT_CONFIG_ARM_32 */ + reg = regs.floatSet().allocateRegister(nullptr); +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + } +#endif /* SLJIT_CONFIG_ARM_32 */ + } + instr->setRequiredReg(tmpIndex, regs.toCPUFloatReg(reg)); + } + tmpIndex++; + } operand++; + list++; } - operand = instr->operands(); + if (instr->info() & Instruction::kIsMergeCompare) { + ASSERT((list[0] & Instruction::TypeMask) == Instruction::Int32Operand && list[1] == 0); + continue; + } + + // Step 2: reuse as many registers as possible. Reusing + // has limitations, which are described in the operand list. + if (hasResult) { + VariableList::Variable* resultVariable = m_variableList->variables.data() + operand->ref; + uint8_t type = (*list & Instruction::TypeMask); + + if (resultVariable->u.rangeStart == instrId && resultVariable->rangeEnd != instrId) { + if (!regs.reuseResult(*list, reusableRegs, resultVariable)) { + regs.allocateVariable(resultVariable); + } + } + + if (type & Instruction::FloatOperandMarker) { + if (resultVariable->reg1 != VariableList::kUnusedReg) { + regs.floatReserve(resultVariable->reg1); +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + regs.floatReserve(resultVariable->reg2); +#endif /* SLJIT_CONFIG_ARM_32 */ + } + } else if (resultVariable->reg1 != VariableList::kUnusedReg) { + regs.integerReserve(resultVariable->reg1); +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + regs.integerReserve(resultVariable->reg2); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } + + if (*list & Instruction::TmpRequired) { + uint8_t resultReg = resultVariable->reg1; + + if (resultReg == VariableList::kUnusedReg) { + if (type & Instruction::FloatOperandMarker) { +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + if (type == Instruction::V128Operand) { + resultReg = regs.floatSet().allocateQuadRegister(nullptr); + } else { +#endif /* SLJIT_CONFIG_ARM_32 */ + resultReg = regs.floatSet().allocateRegister(nullptr); +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + } +#endif /* SLJIT_CONFIG_ARM_32 */ + resultReg = regs.toCPUFloatReg(resultReg); +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + } else if (type == Instruction::Int64Operand) { + uint8_t otherReg; + resultReg = regs.integerSet().allocateRegisterPair(nullptr, &otherReg); + instr->setRequiredReg(tmpIndex++, regs.toCPUIntegerReg(otherReg)); + resultReg = regs.toCPUIntegerReg(resultReg); +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } else { + resultReg = regs.integerSet().allocateRegister(nullptr); + resultReg = regs.toCPUIntegerReg(resultReg); + } + } else if (type & Instruction::FloatOperandMarker) { +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + if (type == Instruction::V128Operand) { + regs.floatReserve(resultReg + 1); + } +#endif /* SLJIT_CONFIG_ARM_32 */ + regs.floatReserve(resultReg); + resultReg = regs.toCPUFloatReg(resultReg); + } else { +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (type == Instruction::Int64Operand) { + instr->setRequiredReg(tmpIndex++, regs.toCPUIntegerReg(resultReg)); + resultReg = resultVariable->reg2; + ASSERT(resultReg != VariableList::kUnusedReg); + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + regs.integerReserve(resultReg); + resultReg = regs.toCPUIntegerReg(resultReg); + } + + instr->setRequiredReg(tmpIndex++, resultReg); + } + + list++; + } + + uint32_t reuseTmpIndex = tmpIndex; + + for (const uint8_t* nextType = list; *nextType != 0; nextType++) { + int reuseIdx = reuseTemporary(*nextType, reusableRegs); + + if (reuseIdx >= 0) { + VariableList::Variable* variable = reusableRegs[reuseIdx]; + // A register cannot be reused twice. + reusableRegs[reuseIdx] = nullptr; + + uint8_t reg = variable->reg1; + + if (*nextType & Instruction::FloatOperandMarker) { +#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + if ((*nextType & Instruction::TypeMask) == Instruction::V128Operand) { + regs.floatReserve(reg + 1); + } +#endif /* SLJIT_CONFIG_ARM_32 */ + + regs.floatReserve(reg); + instr->setRequiredReg(reuseTmpIndex, regs.toCPUFloatReg(reg)); + } else { + regs.integerReserve(reg); + instr->setRequiredReg(reuseTmpIndex, regs.toCPUIntegerReg(reg)); + } + } + + reuseTmpIndex++; + } + + // Step 3: initialize uninitialized temporary values. + for (; *list != 0; list++) { + // Assign temporary registers. + ASSERT(((*list & Instruction::TypeMask) >= Instruction::Int32Operand || (*list & Instruction::TmpRequired)) + && (*list & Instruction::TypeMask) <= Instruction::Float64Operand); + + if (instr->requiredReg(tmpIndex) == 0) { + if (*list & Instruction::FloatOperandMarker) { + instr->setRequiredReg(tmpIndex, regs.toCPUFloatReg(regs.floatSet().allocateRegister(nullptr))); + } else { + instr->setRequiredReg(tmpIndex, regs.toCPUIntegerReg(regs.integerSet().allocateRegister(nullptr))); + } + } + + tmpIndex++; + } + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + // 64 bit shifts / rotates requires special handling. + if ((instr->info() & (Instruction::kIsShift | Instruction::kIs32Bit)) == Instruction::kIsShift) { + ASSERT(operand == instr->operands() + 2); + VariableList::Variable* variable = m_variableList->variables.data() + operand[-1].ref; + bool isImmediate = (variable->info & VariableList::kIsImmediate) != 0; + + if (instr->opcode() == ByteCode::I64RotlOpcode || instr->opcode() == ByteCode::I64RotrOpcode) { + instr->setRequiredReg(2, regs.toCPUIntegerReg(regs.integerSet().allocateRegister(nullptr))); + + if (!isImmediate) { + instr->setRequiredReg(3, regs.toCPUIntegerReg(regs.integerSet().allocateRegister(nullptr))); + } + } else if (!isImmediate) { + instr->setRequiredReg(2, regs.toCPUIntegerReg(regs.integerSet().allocateRegister(nullptr))); + } + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } + + m_savedIntegerRegCount = regs.integerSet().getSavedRegCount(); + m_savedFloatRegCount = regs.floatSet().getSavedRegCount(); + + // Insert stack inits before the offsets are destroyed. + insertStackInitList(nullptr, 0, variableListParamCount); + + for (auto it : m_variableList->catchUpdates) { + insertStackInitList(it.handler, it.variableListStart, it.variableListSize); + } + + size_t size = m_variableList->variables.size(); + for (size_t i = 0; i < size; i++) { + VariableList::Variable& variable = m_variableList->variables[i]; + + if (variable.reg1 != VariableList::kUnusedReg) { + ASSERT(!(variable.info & (VariableList::kIsMerged | VariableList::kIsImmediate))); + uint8_t reg1; + + if (variable.info & Instruction::FloatOperandMarker) { + reg1 = regs.toCPUFloatReg(variable.reg1); + } else { + reg1 = regs.toCPUIntegerReg(variable.reg1); + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (variable.reg2 != variable.reg1) { + uint8_t reg2; + + if (variable.info & Instruction::FloatOperandMarker) { + reg2 = regs.toCPUFloatReg(variable.reg2); + } else { + reg2 = regs.toCPUIntegerReg(variable.reg2); + } + + variable.value = VARIABLE_SET(SLJIT_REG_PAIR(reg1, reg2), Operand::Register); + continue; + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + } + + variable.value = VARIABLE_SET(reg1, Operand::Register); + } + } +} + +#else + +void JITCompiler::allocateRegisters() +{ + m_savedIntegerRegCount = 0; + m_savedFloatRegCount = 0; + + if (m_variableList == nullptr) { + return; + } + + // Dummy register allocator. + for (InstructionListItem* item = m_first; item != nullptr; item = item->next()) { + if (item->isLabel()) { + continue; + } + + Instruction* instr = item->asInstruction(); + Operand* operand = instr->operands(); + Operand* end = operand + instr->paramCount() + instr->resultCount(); + const uint8_t* list = m_variableList->getOperandDescriptor(instr); + uint32_t paramCount = instr->paramCount(); + uint32_t resultCount = instr->resultCount(); + uint32_t tmpIndex = 0; + uint32_t nextIntIndex = SLJIT_R0; + uint32_t nextFloatIndex = SLJIT_FR0; + instr->setRequiredRegsDescriptor(0); if (*list == 0) { @@ -68,15 +932,22 @@ void JITCompiler::allocateRegisters() continue; } +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) + if (instr->opcode() == ByteCode::I64MulOpcode) { + instr->setRequiredReg(0, SLJIT_R2); + } +#endif /* SLJIT_32BIT_ARCHITECTURE */ + for (uint32_t i = 0; i < paramCount; i++) { ASSERT((*list & Instruction::TypeMask) >= Instruction::Int32Operand && (*list & Instruction::TypeMask) <= Instruction::Float64Operand); - if ((*list & Instruction::TypeMask) >= Instruction::FloatOperandStart + if ((*list & Instruction::TypeMask & Instruction::FloatOperandMarker) && !(*list & Instruction::TmpNotAllowed)) { + VariableList::Variable& variable = m_variableList->variables[operand->ref]; + // Source registers are read-only. - if ((*list & Instruction::TmpRequired) - || (VARIABLE_TYPE(operand->ref) == Operand::Immediate)) { + if ((*list & Instruction::TmpRequired) || (variable.info & VariableList::kIsImmediate)) { instr->setRequiredReg(tmpIndex, nextFloatIndex++); #if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) if ((*list & Instruction::TypeMask) == Instruction::V128Operand) { @@ -112,9 +983,7 @@ void JITCompiler::allocateRegisters() #endif /* SLJIT_32BIT_ARCHITECTURE */ } - if ((*list & Instruction::TypeMask) < Instruction::FloatOperandStart) { - instr->setRequiredReg(tmpIndex, nextIntIndex++); - } else { + if (*list & Instruction::FloatOperandMarker) { instr->setRequiredReg(tmpIndex, nextFloatIndex++); #if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) if ((*list & Instruction::TypeMask) == Instruction::V128Operand) { @@ -122,6 +991,8 @@ void JITCompiler::allocateRegisters() nextFloatIndex++; } #endif /* SLJIT_CONFIG_ARM_32 */ + } else { + instr->setRequiredReg(tmpIndex, nextIntIndex++); } tmpIndex++; } @@ -130,7 +1001,8 @@ void JITCompiler::allocateRegisters() // 64 bit shifts / rotates requires special handling. if ((instr->info() & (Instruction::kIsShift | Instruction::kIs32Bit)) == Instruction::kIsShift) { ASSERT(operand == instr->operands() + 2); - bool isImmediate = (VARIABLE_TYPE(operand[-1].ref) == Operand::Immediate); + VariableList::Variable* variable = m_variableList->variables.data() + operand[-1].ref; + bool isImmediate = (variable->info & VariableList::kIsImmediate) != 0; if (instr->opcode() == ByteCode::I64RotlOpcode || instr->opcode() == ByteCode::I64RotrOpcode) { instr->setRequiredReg(2, SLJIT_R2); @@ -155,6 +1027,38 @@ void JITCompiler::allocateRegisters() ASSERT(resultCount == 0); } +} + +#endif + +void JITCompiler::freeVariables() +{ + for (InstructionListItem* item = m_first; item != nullptr; item = item->next()) { + if (item->isLabel()) { + continue; + } + + Instruction* instr = item->asInstruction(); + Operand* operand = instr->operands(); + Operand* end = operand + instr->paramCount() + instr->resultCount(); + uint16_t info = 0; + + while (operand < end) { + VariableRef ref = operand->ref; + + VariableList::Variable& variable = m_variableList->variables[ref]; + + ASSERT((variable.info & Instruction::TypeMask) > 0); + info |= variable.info; + + operand->ref = variable.value; + operand++; + } + + if (info & Instruction::FloatOperandMarker) { + instr->addInfo(Instruction::kHasFloatOperand); + } + } delete m_variableList; m_variableList = nullptr; diff --git a/src/jit/SimdInl.h b/src/jit/SimdInl.h index 97ff2b178..2f8b14faf 100644 --- a/src/jit/SimdInl.h +++ b/src/jit/SimdInl.h @@ -24,10 +24,10 @@ static void emitMoveV128(sljit_compiler* compiler, Instruction* instr) sljit_s32 dstReg = GET_TARGET_REG(dst.arg, SLJIT_TMP_DEST_FREG); - simdOperandToArg(compiler, operands + 0, src, SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_FREG); + simdOperandToArg(compiler, operands + 0, src, SLJIT_SIMD_REG_128, dstReg); if (SLJIT_IS_MEM(dst.arg)) { - sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_FREG, dst.arg, dst.argw); + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128, dstReg, dst.arg, dst.argw); } } diff --git a/src/jit/TableInl.h b/src/jit/TableInl.h index 0358c6f20..87a12dd78 100644 --- a/src/jit/TableInl.h +++ b/src/jit/TableInl.h @@ -18,13 +18,7 @@ static void emitLoad3Arguments(sljit_compiler* compiler, Operand* params) { - JITArg srcArg; - - for (int i = 0; i < 3; i++) { - srcArg.set(params + i); - MOVE_TO_REG(compiler, SLJIT_MOV32, SLJIT_R(i), srcArg.arg, srcArg.argw); - } - + emitInitR0R1R2(compiler, SLJIT_MOV32, params); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, kContextReg, 0); } @@ -153,19 +147,15 @@ static void emitTable(sljit_compiler* compiler, Instruction* instr) case ByteCode::TableGrowOpcode: { ASSERT(instr->info() & Instruction::kIsCallback); - JITArg arg(instr->operands()); - MOVE_TO_REG(compiler, SLJIT_MOV32, SLJIT_R0, arg.arg, arg.argw); + Operand* operands = instr->operands(); + JITArg args[3] = { operands, operands + 1, operands + 2 }; - arg.set(instr->operands() + 1); + emitInitR0R1(compiler, SLJIT_MOV32, SLJIT_MOV_P, args); - MOVE_TO_REG(compiler, SLJIT_MOV_P, SLJIT_R1, arg.arg, arg.argw); sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_IMM, ((reinterpret_cast(instr->byteCode()))->tableIndex())); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, kContextReg, 0); sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, P, 32, 32, W), SLJIT_IMM, GET_FUNC_ADDR(sljit_sw, growTable)); - - arg.set(instr->operands() + 2); - - MOVE_FROM_REG(compiler, SLJIT_MOV32, arg.arg, arg.argw, SLJIT_R0); + MOVE_FROM_REG(compiler, SLJIT_MOV32, args[2].arg, args[2].argw, SLJIT_R0); break; } case ByteCode::TableSetOpcode: {