Skip to content

Commit

Permalink
Merge pull request #2163 from lioncash/movshdup
Browse files Browse the repository at this point in the history
OpcodeDispatcher: Handle VMOVSHDUP/VMOVSLDUP
  • Loading branch information
Sonicadvance1 committed Nov 22, 2022
2 parents bc20f1e + 2e93d2c commit 58f35ba
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 7 deletions.
2 changes: 2 additions & 0 deletions External/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5696,11 +5696,13 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {

{OPD(1, 0b00, 0x12), 1, &OpDispatchBuilder::VMOVLPOp},
{OPD(1, 0b01, 0x12), 1, &OpDispatchBuilder::VMOVLPOp},
{OPD(1, 0b10, 0x12), 1, &OpDispatchBuilder::VMOVSLDUPOp},
{OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::VMOVLPOp},
{OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::VMOVLPOp},

{OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::VMOVHPOp},
{OPD(1, 0b01, 0x16), 1, &OpDispatchBuilder::VMOVHPOp},
{OPD(1, 0b10, 0x16), 1, &OpDispatchBuilder::VMOVSHDUPOp},
{OPD(1, 0b00, 0x17), 1, &OpDispatchBuilder::VMOVHPOp},
{OPD(1, 0b01, 0x17), 1, &OpDispatchBuilder::VMOVHPOp},

Expand Down
3 changes: 3 additions & 0 deletions External/FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,9 @@ enum class SelectionFlag {
void VMOVHPOp(OpcodeArgs);
void VMOVLPOp(OpcodeArgs);

void VMOVSHDUPOp(OpcodeArgs);
void VMOVSLDUPOp(OpcodeArgs);

// X87 Ops
template<size_t width>
void FLD(OpcodeArgs);
Expand Down
36 changes: 31 additions & 5 deletions External/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,22 +139,48 @@ void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) {

void OpDispatchBuilder::MOVSHDUPOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 8);
OrderedNode *Result = _VInsElement(16, 4, 3, 3, Src, Src);
Result = _VInsElement(16, 4, 2, 3, Result, Src);
Result = _VInsElement(16, 4, 1, 1, Result, Src);
OrderedNode *Result = _VInsElement(16, 4, 2, 3, Src, Src);
Result = _VInsElement(16, 4, 0, 1, Result, Src);
StoreResult(FPRClass, Op, Result, -1);
}

void OpDispatchBuilder::VMOVSHDUPOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
const auto SrcSize = GetSrcSize(Op);
const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;

OrderedNode *Result = _VInsElement(SrcSize, 4, 2, 3, Src, Src);
Result = _VInsElement(SrcSize, 4, 0, 1, Result, Src);
if (Is256Bit) {
Result = _VInsElement(SrcSize, 4, 4, 5, Result, Src);
Result = _VInsElement(SrcSize, 4, 6, 7, Result, Src);
}

StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1);
}

void OpDispatchBuilder::MOVSLDUPOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, 8);
OrderedNode *Result = _VInsElement(16, 4, 3, 2, Src, Src);
Result = _VInsElement(16, 4, 2, 2, Result, Src);
Result = _VInsElement(16, 4, 1, 0, Result, Src);
Result = _VInsElement(16, 4, 0, 0, Result, Src);
StoreResult(FPRClass, Op, Result, -1);
}

void OpDispatchBuilder::VMOVSLDUPOp(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
const auto SrcSize = GetSrcSize(Op);
const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE;

OrderedNode *Result = _VInsElement(SrcSize, 4, 3, 2, Src, Src);
Result = _VInsElement(SrcSize, 4, 1, 0, Result, Src);
if (Is256Bit) {
Result = _VInsElement(SrcSize, 4, 5, 4, Result, Src);
Result = _VInsElement(SrcSize, 4, 7, 6, Result, Src);
}

StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 32, -1);
}

void OpDispatchBuilder::MOVSSOp(OpcodeArgs) {
if (Op->Dest.IsGPR() && Op->Src[0].IsGPR()) {
// MOVSS xmm1, xmm2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ void InitializeVEXTables() {

{OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b10, 0x12), 1, X86InstInfo{"VMOVSLDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b10, 0x12), 1, X86InstInfo{"VMOVSLDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b11, 0x12), 1, X86InstInfo{"VMOVDDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},

{OPD(1, 0b00, 0x13), 1, X86InstInfo{"VMOVLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}},
Expand All @@ -43,7 +43,7 @@ void InitializeVEXTables() {

{OPD(1, 0b00, 0x16), 1, X86InstInfo{"VMOVHPS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b01, 0x16), 1, X86InstInfo{"VMOVHPD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC, 0, nullptr}},
{OPD(1, 0b10, 0x16), 1, X86InstInfo{"VMOVSHDUP", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(1, 0b10, 0x16), 1, X86InstInfo{"VMOVSHDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},

{OPD(1, 0b00, 0x17), 1, X86InstInfo{"VMOVHPS", TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(1, 0b01, 0x17), 1, X86InstInfo{"VMOVHPD", TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0, nullptr}},
Expand Down
41 changes: 41 additions & 0 deletions unittests/ASM/VEX/vmovshdup.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x0808080808080808"],
"XMM2": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x0808080809090909"],
"XMM4": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x0808080808080808"],
"XMM5": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x0808080808080808"],
"XMM7": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif

lea rdx, [rel .data]

;; Broadcast across self
vmovaps ymm1, [rdx]
vmovshdup ymm1, ymm1
; 128-bit version
vmovaps xmm2, [rdx]
vmovshdup xmm2, xmm2

;; Broadcast from different registers
vmovaps ymm3, [rdx]
vmovshdup ymm4, ymm3
; 128-bit version
vmovshdup xmm5, xmm3

;; Broadcast from memory
vmovshdup ymm6, [rdx]
; 128-bit version
vmovshdup xmm7, [rdx]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xEE, 0xEE, 0xEE, 0xEE, 0xDD, 0xDD, 0xDD, 0xDD, 0xCC, 0xCC, 0xCC, 0xCC
db 0xBB, 0xBB, 0xBB, 0xBB, 0xAA, 0xAA, 0xAA, 0xAA, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08
41 changes: 41 additions & 0 deletions unittests/ASM/VEX/vmovsldup.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0xBBBBBBBBBBBBBBBB", "0x0909090909090909"],
"XMM2": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x0808080809090909"],
"XMM4": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0xBBBBBBBBBBBBBBBB", "0x0909090909090909"],
"XMM5": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0xBBBBBBBBBBBBBBBB", "0x0909090909090909"],
"XMM7": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif

lea rdx, [rel .data]

;; Broadcast across self
vmovaps ymm1, [rdx]
vmovsldup ymm1, ymm1
; 128-bit version
vmovaps xmm2, [rdx]
vmovsldup xmm2, xmm2

;; Broadcast from different registers
vmovaps ymm3, [rdx]
vmovsldup ymm4, ymm3
; 128-bit version
vmovsldup xmm5, xmm3

;; Broadcast from memory
vmovsldup ymm6, [rdx]
; 128-bit version
vmovsldup xmm7, [rdx]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xEE, 0xEE, 0xEE, 0xEE, 0xDD, 0xDD, 0xDD, 0xDD, 0xCC, 0xCC, 0xCC, 0xCC
db 0xBB, 0xBB, 0xBB, 0xBB, 0xAA, 0xAA, 0xAA, 0xAA, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08

0 comments on commit 58f35ba

Please sign in to comment.