Skip to content

Commit

Permalink
Fix simd shuffle operation on x86
Browse files Browse the repository at this point in the history
Signed-off-by: Zoltan Herczeg zherczeg.u-szeged@partner.samsung.com
  • Loading branch information
Zoltan Herczeg authored and clover2123 committed May 23, 2024
1 parent 29a1cd0 commit 3341dfe
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 26 deletions.
70 changes: 44 additions & 26 deletions src/jit/SimdX86Inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1828,44 +1828,62 @@ static void emitShuffleSIMD(sljit_compiler* compiler, Instruction* instr)
Operand* operands = instr->operands();
sljit_s32 tmp1 = instr->requiredReg(2);
sljit_s32 tmp2 = SLJIT_TMP_DEST_FREG;
const sljit_s32 type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
I8X16Shuffle* shuffle = reinterpret_cast<I8X16Shuffle*>(instr->byteCode());
JITArg args[3];

simdOperandToArg(compiler, operands, args[0], SLJIT_SIMD_ELEM_128, instr->requiredReg(0));
simdOperandToArg(compiler, operands + 1, args[1], SLJIT_SIMD_ELEM_128, instr->requiredReg(1));

args[2].set(operands + 2);
sljit_s32 dst = GET_TARGET_REG(args[2].arg, instr->requiredReg(1));

I8X16Shuffle* shuffle = reinterpret_cast<I8X16Shuffle*>(instr->byteCode());
const sljit_s32 type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, tmp1, SLJIT_MEM0(), reinterpret_cast<sljit_sw>(shuffle->value()));
if (operands[0].ref == operands[1].ref) {
simdOperandToArg(compiler, operands, args[0], SLJIT_SIMD_ELEM_128, dst);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_TMP_DEST_FREG, SLJIT_MEM0(), reinterpret_cast<sljit_sw>(shuffle->value()));

sljit_emit_simd_replicate(compiler, SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8, tmp2, SLJIT_IMM, 0xf0);
simdEmitSSEOp(compiler, SimdOp::paddb, tmp1, tmp2);
if (args[0].arg != dst) {
if (sljit_has_cpu_feature(SLJIT_HAS_AVX)) {
simdEmitVexOp(compiler, SimdOp::pshufb, dst, args[1].arg, tmp1);
} else {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, dst, args[0].arg, 0);
args[1].arg = dst;
}
}

if (dst != args[1].arg) {
if (sljit_has_cpu_feature(SLJIT_HAS_AVX)) {
simdEmitVexOp(compiler, SimdOp::pshufb, dst, args[1].arg, tmp1);
} else {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, dst, args[1].arg, 0);
args[1].arg = dst;
if (dst == args[0].arg) {
simdEmitSSEOp(compiler, SimdOp::pshufb, dst, SLJIT_TMP_DEST_FREG);
}
}
} else {
simdOperandToArg(compiler, operands, args[0], SLJIT_SIMD_ELEM_128, instr->requiredReg(0));
simdOperandToArg(compiler, operands + 1, args[1], SLJIT_SIMD_ELEM_128, instr->requiredReg(1));

if (dst == args[1].arg) {
simdEmitSSEOp(compiler, SimdOp::pshufb, dst, tmp1);
}
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, tmp1, SLJIT_MEM0(), reinterpret_cast<sljit_sw>(shuffle->value()));

simdEmitSSEOp(compiler, SimdOp::pxor, tmp1, tmp2);
sljit_emit_simd_replicate(compiler, SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8, tmp2, SLJIT_IMM, 0xf0);
simdEmitSSEOp(compiler, SimdOp::paddb, tmp1, tmp2);

if (sljit_has_cpu_feature(SLJIT_HAS_AVX)) {
simdEmitVexOp(compiler, SimdOp::pshufb, tmp2, args[0].arg, tmp1);
} else {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, tmp2, args[0].arg, 0);
simdEmitSSEOp(compiler, SimdOp::pshufb, tmp2, tmp1);
}
if (dst != args[1].arg) {
if (sljit_has_cpu_feature(SLJIT_HAS_AVX)) {
simdEmitVexOp(compiler, SimdOp::pshufb, dst, args[1].arg, tmp1);
} else {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, dst, args[1].arg, 0);
args[1].arg = dst;
}
}

if (dst == args[1].arg) {
simdEmitSSEOp(compiler, SimdOp::pshufb, dst, tmp1);
}

simdEmitSSEOp(compiler, SimdOp::pxor, tmp1, tmp2);

simdEmitSSEOp(compiler, SimdOp::por, dst, tmp2);
if (sljit_has_cpu_feature(SLJIT_HAS_AVX)) {
simdEmitVexOp(compiler, SimdOp::pshufb, tmp2, args[0].arg, tmp1);
} else {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, tmp2, args[0].arg, 0);
simdEmitSSEOp(compiler, SimdOp::pshufb, tmp2, tmp1);
}

simdEmitSSEOp(compiler, SimdOp::por, dst, tmp2);
}

if (SLJIT_IS_MEM(args[2].arg)) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, dst, args[2].arg, args[2].argw);
Expand Down
10 changes: 10 additions & 0 deletions test/jit/simd-shuffle.wast
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@
v128.not
local.set 2
)

(func (export "test3") (param v128) (result v128)
local.get 0
local.get 0
i8x16.shuffle 30 14 28 12 26 10 24 8 22 6 20 4 18 2 16 0
)
)

(assert_return (invoke "test1"
Expand All @@ -39,3 +45,7 @@
(v128.const i64x2 0x0807060504030201 0x100f0e0d0c0b0a09)
(v128.const i64x2 0x1817161514131211 0x201f1e1d1c1b1a19))
(v128.const i64x2 0x19091b0b1d0d1f0f 0x1101130315051707))

(assert_return (invoke "test3"
(v128.const i64x2 0x8182838485868788 0x2122232425262728))
(v128.const i64x2 0x2828262624242222 0x8888868684848282))

0 comments on commit 3341dfe

Please sign in to comment.