Skip to content

Commit

Permalink
Merge pull request #2028 from lioncash/vfdiv
Browse files Browse the repository at this point in the history
IR: Handle 256-bit VFDiv
  • Loading branch information
Sonicadvance1 committed Sep 27, 2022
2 parents 4472265 + 6116ae5 commit 50eba40
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 45 deletions.
13 changes: 8 additions & 5 deletions External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
Expand Up @@ -545,20 +545,23 @@ DEF_OP(VFMul) {
}

DEF_OP(VFDiv) {
auto Op = IROp->C<IR::IROp_VFDiv>();
const auto Op = IROp->C<IR::IROp_VFDiv>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

const auto Func = [](auto a, auto b) { return a / b; };
switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_OP(4, float, Func)
DO_VECTOR_OP(8, double, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand Down
88 changes: 67 additions & 21 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
Expand Up @@ -1093,34 +1093,80 @@ DEF_OP(VFMul) {
}

DEF_OP(VFDiv) {
auto Op = IROp->C<IR::IROp_VFDiv>();
const uint8_t OpSize = IROp->Size;
if (Op->Header.ElementSize == OpSize) {
// Scalar
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VFDiv>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == 32;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
// SVE VDIV is a destructive operation, so we need a temporary.
mov(VTMP1.Z().VnD(), Vector1.Z().VnD());

switch (ElementSize) {
case 2: {
fdiv(VTMP1.Z().VnH(), PRED_TMP_32B.Merging(),
VTMP1.Z().VnH(), Vector2.Z().VnH());
break;
}
case 4: {
fdiv(GetDst(Node).S(), GetSrc(Op->Vector1.ID()).S(), GetSrc(Op->Vector2.ID()).S());
break;
fdiv(VTMP1.Z().VnS(), PRED_TMP_32B.Merging(),
VTMP1.Z().VnS(), Vector2.Z().VnS());
break;
}
case 8: {
fdiv(GetDst(Node).D(), GetSrc(Op->Vector1.ID()).D(), GetSrc(Op->Vector2.ID()).D());
break;
fdiv(VTMP1.Z().VnD(), PRED_TMP_32B.Merging(),
VTMP1.Z().VnD(), Vector2.Z().VnD());
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
}
else {
// Vector
switch (Op->Header.ElementSize) {
case 4: {
fdiv(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
if (IsScalar) {
switch (ElementSize) {
case 2: {
fdiv(Dst.H(), Vector1.H(), Vector2.H());
break;
}
case 4: {
fdiv(Dst.S(), Vector1.S(), Vector2.S());
break;
}
case 8: {
fdiv(Dst.D(), Vector1.D(), Vector2.D());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
case 8: {
fdiv(GetDst(Node).V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
break;
} else {
switch (ElementSize) {
case 2: {
fdiv(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
break;
}
case 4: {
fdiv(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
break;
}
case 8: {
fdiv(Dst.V2D(), Vector1.V2D(), Vector2.V2D());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}
}
Expand Down
50 changes: 31 additions & 19 deletions External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp
Expand Up @@ -727,35 +727,47 @@ DEF_OP(VFMul) {
}

DEF_OP(VFDiv) {
auto Op = IROp->C<IR::IROp_VFDiv>();
const uint8_t OpSize = IROp->Size;
const auto Op = IROp->C<IR::IROp_VFDiv>();
const auto OpSize = IROp->Size;

if (Op->Header.ElementSize == OpSize) {
// Scalar
switch (Op->Header.ElementSize) {
const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (IsScalar) {
switch (ElementSize) {
case 4: {
vdivss(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
break;
vdivss(Dst, Vector1, Vector2);
break;
}
case 8: {
vdivsd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
break;
vdivsd(Dst, Vector1, Vector2);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}
else {
// Vector
switch (Op->Header.ElementSize) {
} else {
const auto DstYMM = ToYMM(Dst);
const auto Vector1YMM = ToYMM(Vector1);
const auto Vector2YMM = ToYMM(Vector2);

switch (ElementSize) {
case 4: {
vdivps(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
break;
vdivps(DstYMM, Vector1YMM, Vector2YMM);
break;
}
case 8: {
vdivpd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
break;
vdivpd(DstYMM, Vector1YMM, Vector2YMM);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}
}
Expand Down

0 comments on commit 50eba40

Please sign in to comment.