Skip to content

Commit

Permalink
Merge pull request #2044 from lioncash/vumax
Browse files Browse the repository at this point in the history
IR: Handle 256-bit VSMax/VUMax
  • Loading branch information
Sonicadvance1 committed Sep 29, 2022
2 parents aa6a499 + bf6f855 commit 8d8b029
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 64 deletions.
30 changes: 18 additions & 12 deletions External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
Expand Up @@ -786,43 +786,49 @@ DEF_OP(VSMin) {
}

DEF_OP(VUMax) {
auto Op = IROp->C<IR::IROp_VUMax>();
const auto Op = IROp->C<IR::IROp_VUMax>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const auto Func = [](auto a, auto b) { return std::max(a, b); };
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

switch (Op->Header.ElementSize) {
const auto Func = [](auto a, auto b) { return std::max(a, b); };
switch (ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
DO_VECTOR_OP(2, uint16_t, Func)
DO_VECTOR_OP(4, uint32_t, Func)
DO_VECTOR_OP(8, uint64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}

DEF_OP(VSMax) {
auto Op = IROp->C<IR::IROp_VSMax>();
const auto Op = IROp->C<IR::IROp_VSMax>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const auto Func = [](auto a, auto b) { return std::max(a, b); };
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

switch (Op->Header.ElementSize) {
const auto Func = [](auto a, auto b) { return std::max(a, b); };
switch (ElementSize) {
DO_VECTOR_OP(1, int8_t, Func)
DO_VECTOR_OP(2, int16_t, Func)
DO_VECTOR_OP(4, int32_t, Func)
DO_VECTOR_OP(8, int64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand Down
166 changes: 126 additions & 40 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
Expand Up @@ -1953,54 +1953,140 @@ DEF_OP(VSMin) {
}

DEF_OP(VUMax) {
auto Op = IROp->C<IR::IROp_VUMax>();
switch (Op->Header.ElementSize) {
case 1: {
umax(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
}
case 2: {
umax(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
}
case 4: {
umax(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
const auto Op = IROp->C<IR::IROp_VUMax>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
const auto Pred = PRED_TMP_32B.Merging();

// SVE UMAX is a destructive operation, so we need a temporary.
mov(VTMP1.Z().VnD(), Vector1.Z().VnD());

switch (ElementSize) {
case 1: {
umax(VTMP1.Z().VnB(), Pred, VTMP1.Z().VnB(), Vector2.Z().VnB());
break;
}
case 2: {
umax(VTMP1.Z().VnH(), Pred, VTMP1.Z().VnH(), Vector2.Z().VnH());
break;
}
case 4: {
umax(VTMP1.Z().VnS(), Pred, VTMP1.Z().VnS(), Vector2.Z().VnS());
break;
}
case 8: {
umax(VTMP1.Z().VnD(), Pred, VTMP1.Z().VnD(), Vector2.Z().VnD());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
case 8: {
cmhi(VTMP1.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D());
mov(VTMP2.V2D(), GetSrc(Op->Vector1.ID()).V2D());
bit(VTMP2.V16B(), GetSrc(Op->Vector2.ID()).V16B(), VTMP1.V16B());
mov(GetDst(Node).V2D(), VTMP2.V2D());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
switch (ElementSize) {
case 1: {
umax(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
break;
}
case 2: {
umax(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
break;
}
case 4: {
umax(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
break;
}
case 8: {
cmhi(VTMP1.V2D(), Vector2.V2D(), Vector1.V2D());
mov(VTMP2.V2D(), Vector1.V2D());
bit(VTMP2.V16B(), Vector2.V16B(), VTMP1.V16B());
mov(Dst.V2D(), VTMP2.V2D());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}

DEF_OP(VSMax) {
auto Op = IROp->C<IR::IROp_VSMax>();
switch (Op->Header.ElementSize) {
case 1: {
smax(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
}
case 2: {
smax(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
}
case 4: {
smax(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
const auto Op = IROp->C<IR::IROp_VSMax>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
const auto Pred = PRED_TMP_32B.Merging();

// SVE SMAX is a destructive operation, so we need a temporary.
mov(VTMP1.Z().VnD(), Vector1.Z().VnD());

switch (ElementSize) {
case 1: {
smax(VTMP1.Z().VnB(), Pred, VTMP1.Z().VnB(), Vector2.Z().VnB());
break;
}
case 2: {
smax(VTMP1.Z().VnH(), Pred, VTMP1.Z().VnH(), Vector2.Z().VnH());
break;
}
case 4: {
smax(VTMP1.Z().VnS(), Pred, VTMP1.Z().VnS(), Vector2.Z().VnS());
break;
}
case 8: {
smax(VTMP1.Z().VnD(), Pred, VTMP1.Z().VnD(), Vector2.Z().VnD());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
case 8: {
cmgt(VTMP1.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D());
mov(VTMP2.V2D(), GetSrc(Op->Vector1.ID()).V2D());
bit(VTMP2.V16B(), GetSrc(Op->Vector2.ID()).V16B(), VTMP1.V16B());
mov(GetDst(Node).V2D(), VTMP2.V2D());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
switch (ElementSize) {
case 1: {
smax(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
break;
}
case 2: {
smax(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
break;
}
case 4: {
smax(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
break;
}
case 8: {
cmgt(VTMP1.V2D(), Vector2.V2D(), Vector1.V2D());
mov(VTMP2.V2D(), Vector1.V2D());
bit(VTMP2.V16B(), Vector2.V16B(), VTMP1.V16B());
mov(Dst.V2D(), VTMP2.V2D());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}

Expand Down
42 changes: 30 additions & 12 deletions External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp
Expand Up @@ -1174,40 +1174,58 @@ DEF_OP(VSMin) {
}

DEF_OP(VUMax) {
auto Op = IROp->C<IR::IROp_VUMax>();
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VUMax>();

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = ToYMM(GetDst(Node));
const auto Vector1 = ToYMM(GetSrc(Op->Vector1.ID()));
const auto Vector2 = ToYMM(GetSrc(Op->Vector2.ID()));

switch (ElementSize) {
case 1: {
vpmaxub(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpmaxub(Dst, Vector1, Vector2);
break;
}
case 2: {
vpmaxuw(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpmaxuw(Dst, Vector1, Vector2);
break;
}
case 4: {
vpmaxud(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpmaxud(Dst, Vector1, Vector2);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

DEF_OP(VSMax) {
auto Op = IROp->C<IR::IROp_VSMax>();
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VSMax>();

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = ToYMM(GetDst(Node));
const auto Vector1 = ToYMM(GetSrc(Op->Vector1.ID()));
const auto Vector2 = ToYMM(GetSrc(Op->Vector2.ID()));

switch (ElementSize) {
case 1: {
vpmaxsb(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpmaxsb(Dst, Vector1, Vector2);
break;
}
case 2: {
vpmaxsw(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpmaxsw(Dst, Vector1, Vector2);
break;
}
case 4: {
vpmaxsd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()));
vpmaxsd(Dst, Vector1, Vector2);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

Expand Down

0 comments on commit 8d8b029

Please sign in to comment.