Skip to content

Commit

Permalink
Merge pull request #2043 from lioncash/vumin
Browse files Browse the repository at this point in the history
IR: Handle 256-bit VSMin/VUMin
  • Loading branch information
Sonicadvance1 committed Sep 29, 2022
2 parents 64c4fdc + aef801b commit aa6a499
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 70 deletions.
30 changes: 18 additions & 12 deletions External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
Expand Up @@ -738,43 +738,49 @@ DEF_OP(VNot) {
}

DEF_OP(VUMin) {
auto Op = IROp->C<IR::IROp_VUMin>();
const auto Op = IROp->C<IR::IROp_VUMin>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const auto Func = [](auto a, auto b) { return std::min(a, b); };
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

switch (Op->Header.ElementSize) {
const auto Func = [](auto a, auto b) { return std::min(a, b); };
switch (ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
DO_VECTOR_OP(2, uint16_t, Func)
DO_VECTOR_OP(4, uint32_t, Func)
DO_VECTOR_OP(8, uint64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}

DEF_OP(VSMin) {
auto Op = IROp->C<IR::IROp_VSMin>();
const auto Op = IROp->C<IR::IROp_VSMin>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const auto Func = [](auto a, auto b) { return std::min(a, b); };
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

switch (Op->Header.ElementSize) {
const auto Func = [](auto a, auto b) { return std::min(a, b); };
switch (ElementSize) {
DO_VECTOR_OP(1, int8_t, Func)
DO_VECTOR_OP(2, int16_t, Func)
DO_VECTOR_OP(4, int32_t, Func)
DO_VECTOR_OP(8, int64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand Down
166 changes: 126 additions & 40 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
Expand Up @@ -1815,54 +1815,140 @@ DEF_OP(VNot) {
}

DEF_OP(VUMin) {
auto Op = IROp->C<IR::IROp_VUMin>();
switch (Op->Header.ElementSize) {
case 1: {
umin(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
}
case 2: {
umin(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
}
case 4: {
umin(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
const auto Op = IROp->C<IR::IROp_VUMin>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
const auto Pred = PRED_TMP_32B.Merging();

// SVE UMIN is a destructive operation so we need a temporary.
mov(VTMP1.Z().VnD(), Vector1.Z().VnD());

switch (ElementSize) {
case 1: {
umin(VTMP1.Z().VnB(), Pred, VTMP1.Z().VnB(), Vector2.Z().VnB());
break;
}
case 2: {
umin(VTMP1.Z().VnH(), Pred, VTMP1.Z().VnH(), Vector2.Z().VnH());
break;
}
case 4: {
umin(VTMP1.Z().VnS(), Pred, VTMP1.Z().VnS(), Vector2.Z().VnS());
break;
}
case 8: {
umin(VTMP1.Z().VnD(), Pred, VTMP1.Z().VnD(), Vector2.Z().VnD());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
case 8: {
cmhi(VTMP1.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D());
mov(VTMP2.V2D(), GetSrc(Op->Vector1.ID()).V2D());
bif(VTMP2.V16B(), GetSrc(Op->Vector2.ID()).V16B(), VTMP1.V16B());
mov(GetDst(Node).V2D(), VTMP2.V2D());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
switch (ElementSize) {
case 1: {
umin(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
break;
}
case 2: {
umin(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
break;
}
case 4: {
umin(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
break;
}
case 8: {
cmhi(VTMP1.V2D(), Vector2.V2D(), Vector1.V2D());
mov(VTMP2.V2D(), Vector1.V2D());
bif(VTMP2.V16B(), Vector2.V16B(), VTMP1.V16B());
mov(Dst.V2D(), VTMP2.V2D());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}

DEF_OP(VSMin) {
auto Op = IROp->C<IR::IROp_VSMin>();
switch (Op->Header.ElementSize) {
case 1: {
smin(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
}
case 2: {
smin(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
}
case 4: {
smin(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
const auto Op = IROp->C<IR::IROp_VSMin>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
const auto Pred = PRED_TMP_32B.Merging();

// SVE SMIN is a destructive operation, so we need a temporary.
mov(VTMP1.Z().VnD(), Vector1.Z().VnD());

switch (ElementSize) {
case 1: {
smin(VTMP1.Z().VnB(), Pred, VTMP1.Z().VnB(), Vector2.Z().VnB());
break;
}
case 2: {
smin(VTMP1.Z().VnH(), Pred, VTMP1.Z().VnH(), Vector2.Z().VnH());
break;
}
case 4: {
smin(VTMP1.Z().VnS(), Pred, VTMP1.Z().VnS(), Vector2.Z().VnS());
break;
}
case 8: {
smin(VTMP1.Z().VnD(), Pred, VTMP1.Z().VnD(), Vector2.Z().VnD());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
case 8: {
cmgt(VTMP1.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D());
mov(VTMP2.V2D(), GetSrc(Op->Vector1.ID()).V2D());
bif(VTMP2.V16B(), GetSrc(Op->Vector2.ID()).V16B(), VTMP1.V16B());
mov(GetDst(Node).V2D(), VTMP2.V2D());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
switch (ElementSize) {
case 1: {
smin(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
break;
}
case 2: {
smin(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
break;
}
case 4: {
smin(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
break;
}
case 8: {
cmgt(VTMP1.V2D(), Vector2.V2D(), Vector1.V2D());
mov(VTMP2.V2D(), Vector1.V2D());
bif(VTMP2.V16B(), Vector2.V16B(), VTMP1.V16B());
mov(Dst.V2D(), VTMP2.V2D());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}

Expand Down
62 changes: 44 additions & 18 deletions External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp
Expand Up @@ -1094,56 +1094,82 @@ DEF_OP(VNot) {
}

DEF_OP(VUMin) {
auto Op = IROp->C<IR::IROp_VUMin>();
if (Op->Header.Size == Op->Header.ElementSize) {
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VUMin>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = OpSize == ElementSize;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (IsScalar) {
switch (ElementSize) {
case 8: {
// This isn't very nice on x86 until AVX-512
pextrq(TMP1, GetSrc(Op->Vector1.ID()), 0);
pextrq(TMP2, GetSrc(Op->Vector2.ID()), 0);
pextrq(TMP1, Vector1, 0);
pextrq(TMP2, Vector2, 0);
cmp(TMP1, TMP2);
cmovb(TMP2, TMP1);
pinsrq(GetDst(Node), TMP2, 0);
pinsrq(Dst, TMP2, 0);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}
else {
switch (Op->Header.ElementSize) {
const auto DstYMM = ToYMM(Dst);
const auto Vector1YMM = ToYMM(Vector1);
const auto Vector2YMM = ToYMM(Vector2);

switch (ElementSize) {
case 1: {
vpminub(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpminub(DstYMM, Vector1YMM, Vector2YMM);
break;
}
case 2: {
vpminuw(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpminuw(DstYMM, Vector1YMM, Vector2YMM);
break;
}
case 4: {
vpminud(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpminud(DstYMM, Vector1YMM, Vector2YMM);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}
}

DEF_OP(VSMin) {
auto Op = IROp->C<IR::IROp_VSMin>();
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VSMin>();

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = ToYMM(GetDst(Node));
const auto Vector1 = ToYMM(GetSrc(Op->Vector1.ID()));
const auto Vector2 = ToYMM(GetSrc(Op->Vector2.ID()));

switch (ElementSize) {
case 1: {
vpminsb(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpminsb(Dst, Vector1, Vector2);
break;
}
case 2: {
vpminsw(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpminsw(Dst, Vector1, Vector2);
break;
}
case 4: {
vpminsd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpminsd(Dst, Vector1, Vector2);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

Expand Down

0 comments on commit aa6a499

Please sign in to comment.