Skip to content

Commit

Permalink
Merge pull request #1983 from lioncash/vsqadd
Browse files Browse the repository at this point in the history
VectorOps: Extend VSQAdd/VSQSub/VUQAdd/VUQSub
  • Loading branch information
Sonicadvance1 committed Sep 13, 2022
2 parents 0ad52b7 + 809f60d commit f34f130
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 83 deletions.
78 changes: 51 additions & 27 deletions External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
Expand Up @@ -11,6 +11,8 @@ tags: backend|interpreter

#include <bit>
#include <cstdint>
#include <limits>
#include <type_traits>

namespace FEXCore::CPU {
#define DEF_OP(x) void InterpreterOps::Op_##x(IR::IROp_Header *IROp, IROpData *Data, IR::NodeID Node)
Expand Down Expand Up @@ -173,20 +175,23 @@ DEF_OP(VUQAdd) {

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[32];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

const auto Func = [](auto a, auto b) {
decltype(a) res = a + b;
return res < a ? ~0U : res;
};
switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
DO_VECTOR_OP(2, uint16_t, Func)
DO_VECTOR_OP(4, uint32_t, Func)
DO_VECTOR_OP(8, uint64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand All @@ -197,20 +202,23 @@ DEF_OP(VUQSub) {

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[32];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

const auto Func = [](auto a, auto b) {
decltype(a) res = a - b;
return res > a ? 0U : res;
};
switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_OP(1, uint8_t, Func)
DO_VECTOR_OP(2, uint16_t, Func)
DO_VECTOR_OP(4, uint32_t, Func)
DO_VECTOR_OP(8, uint64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand All @@ -221,30 +229,37 @@ DEF_OP(VSQAdd) {

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[32];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

const auto Func = [](auto a, auto b) {
decltype(a) res = a + b;
static_assert(std::is_same_v<decltype(a), decltype(b)>);
using Type = decltype(a);
using Limits = std::numeric_limits<Type>;

const Type res = a + b;

if (a > 0) {
if (b > (std::numeric_limits<decltype(a)>::max() - a)) {
return std::numeric_limits<decltype(a)>::max();
if (b > (Limits::max() - a)) {
return Limits::max();
}
}
else if (b < (std::numeric_limits<decltype(a)>::min() - a)) {
return std::numeric_limits<decltype(a)>::min();
else if (b < (Limits::min() - a)) {
return Limits::min();
}

return res;
};
switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_OP(1, int8_t, Func)
DO_VECTOR_OP(2, int16_t, Func)
DO_VECTOR_OP(4, int32_t, Func)
DO_VECTOR_OP(8, int64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand All @@ -255,26 +270,35 @@ DEF_OP(VSQSub) {

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);
uint8_t Tmp[16];
uint8_t Tmp[32];

const uint8_t Elements = OpSize / Op->Header.ElementSize;
const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;

const auto Func = [](auto a, auto b) {
__int128_t res = a - b;
if (res < std::numeric_limits<decltype(a)>::min())
return std::numeric_limits<decltype(a)>::min();
static_assert(std::is_same_v<decltype(a), decltype(b)>);
using Type = decltype(a);
using Limits = std::numeric_limits<Type>;

const __int128_t res = a - b;
if (res < Limits::min()) {
return Limits::min();
}
if (res > Limits::max()) {
return Limits::max();
}

if (res > std::numeric_limits<decltype(a)>::max())
return std::numeric_limits<decltype(a)>::max();
return (decltype(a))res;
return (Type)res;
};

switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_OP(1, int8_t, Func)
DO_VECTOR_OP(2, int16_t, Func)
DO_VECTOR_OP(4, int32_t, Func)
DO_VECTOR_OP(8, int64_t, Func)
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
memcpy(GDP, Tmp, OpSize);
}
Expand Down
180 changes: 140 additions & 40 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
Expand Up @@ -354,93 +354,193 @@ DEF_OP(VSub) {

DEF_OP(VUQAdd) {
auto Op = IROp->C<IR::IROp_VUQAdd>();
switch (Op->Header.ElementSize) {

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

switch (ElementSize) {
case 1: {
uqadd(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
if (HostSupportsSVE) {
uqadd(Dst.Z().VnB(), Vector1.Z().VnB(), Vector2.Z().VnB());
} else {
uqadd(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
}
break;
}
case 2: {
uqadd(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
if (HostSupportsSVE) {
uqadd(Dst.Z().VnH(), Vector1.Z().VnH(), Vector2.Z().VnH());
} else {
uqadd(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
}
break;
}
case 4: {
uqadd(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
if (HostSupportsSVE) {
uqadd(Dst.Z().VnS(), Vector1.Z().VnS(), Vector2.Z().VnS());
} else {
uqadd(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
}
break;
}
case 8: {
uqadd(GetDst(Node).V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
break;
if (HostSupportsSVE) {
uqadd(Dst.Z().VnD(), Vector1.Z().VnD(), Vector2.Z().VnD());
} else {
uqadd(Dst.V2D(), Vector1.V2D(), Vector2.V2D());
}
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

DEF_OP(VUQSub) {
auto Op = IROp->C<IR::IROp_VUQSub>();
switch (Op->Header.ElementSize) {

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

switch (ElementSize) {
case 1: {
uqsub(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
if (HostSupportsSVE) {
uqsub(Dst.Z().VnB(), Vector1.Z().VnB(), Vector2.Z().VnB());
} else {
uqsub(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
}
break;
}
case 2: {
uqsub(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
if (HostSupportsSVE) {
uqsub(Dst.Z().VnH(), Vector1.Z().VnH(), Vector2.Z().VnH());
} else {
uqsub(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
}
break;
}
case 4: {
uqsub(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
if (HostSupportsSVE) {
uqsub(Dst.Z().VnS(), Vector1.Z().VnS(), Vector2.Z().VnS());
} else {
uqsub(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
}
break;
}
case 8: {
uqsub(GetDst(Node).V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
break;
if (HostSupportsSVE) {
uqsub(Dst.Z().VnD(), Vector1.Z().VnD(), Vector2.Z().VnD());
} else {
uqsub(Dst.V2D(), Vector1.V2D(), Vector2.V2D());
}
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

DEF_OP(VSQAdd) {
auto Op = IROp->C<IR::IROp_VSQAdd>();
switch (Op->Header.ElementSize) {

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

switch (ElementSize) {
case 1: {
sqadd(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
if (HostSupportsSVE) {
sqadd(Dst.Z().VnB(), Vector1.Z().VnB(), Vector2.Z().VnB());
} else {
sqadd(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
}
break;
}
case 2: {
sqadd(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
if (HostSupportsSVE) {
sqadd(Dst.Z().VnH(), Vector1.Z().VnH(), Vector2.Z().VnH());
} else {
sqadd(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
}
break;
}
case 4: {
sqadd(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
if (HostSupportsSVE) {
sqadd(Dst.Z().VnS(), Vector1.Z().VnS(), Vector2.Z().VnS());
} else {
sqadd(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
}
break;
}
case 8: {
sqadd(GetDst(Node).V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
break;
if (HostSupportsSVE) {
sqadd(Dst.Z().VnD(), Vector1.Z().VnD(), Vector2.Z().VnD());
} else {
sqadd(Dst.V2D(), Vector1.V2D(), Vector2.V2D());
}
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

DEF_OP(VSQSub) {
auto Op = IROp->C<IR::IROp_VSQSub>();
switch (Op->Header.ElementSize) {

const auto ElementSize = Op->Header.ElementSize;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

switch (ElementSize) {
case 1: {
sqsub(GetDst(Node).V16B(), GetSrc(Op->Vector1.ID()).V16B(), GetSrc(Op->Vector2.ID()).V16B());
break;
if (HostSupportsSVE) {
sqsub(Dst.Z().VnB(), Vector1.Z().VnB(), Vector2.Z().VnB());
} else {
sqsub(Dst.V16B(), Vector1.V16B(), Vector2.V16B());
}
break;
}
case 2: {
sqsub(GetDst(Node).V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
break;
if (HostSupportsSVE) {
sqsub(Dst.Z().VnH(), Vector1.Z().VnH(), Vector2.Z().VnH());
} else {
sqsub(Dst.V8H(), Vector1.V8H(), Vector2.V8H());
}
break;
}
case 4: {
sqsub(GetDst(Node).V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
break;
if (HostSupportsSVE) {
sqsub(Dst.Z().VnS(), Vector1.Z().VnS(), Vector2.Z().VnS());
} else {
sqsub(Dst.V4S(), Vector1.V4S(), Vector2.V4S());
}
break;
}
case 8: {
sqsub(GetDst(Node).V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
break;
if (HostSupportsSVE) {
sqsub(Dst.Z().VnD(), Vector1.Z().VnD(), Vector2.Z().VnD());
} else {
sqsub(Dst.V2D(), Vector1.V2D(), Vector2.V2D());
}
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
}

Expand Down

0 comments on commit f34f130

Please sign in to comment.