Skip to content

Commit

Permalink
Merge pull request #2002 from lioncash/slots
Browse files Browse the repository at this point in the history
JITs: Expand max spill slot size to 32 bytes
  • Loading branch information
Sonicadvance1 committed Sep 19, 2022
2 parents 169cfbb + 341bdb5 commit 0d0d116
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 45 deletions.
19 changes: 12 additions & 7 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp
Expand Up @@ -783,10 +783,12 @@ void *Arm64JITCore::CompileCode(uint64_t Entry,
SpillSlots = RAData->SpillSlots();

if (SpillSlots) {
if (IsImmAddSub(SpillSlots * 16)) {
sub(sp, sp, SpillSlots * 16);
const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;

if (IsImmAddSub(TotalSpillSlotsSize)) {
sub(sp, sp, TotalSpillSlotsSize);
} else {
LoadConstant(x0, SpillSlots * 16);
LoadConstant(x0, TotalSpillSlotsSize);
sub(sp, sp, x0);
}
}
Expand Down Expand Up @@ -854,14 +856,17 @@ void *Arm64JITCore::CompileCode(uint64_t Entry,
}

void Arm64JITCore::ResetStack() {
if (SpillSlots == 0)
if (SpillSlots == 0) {
return;
}

const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;

if (IsImmAddSub(SpillSlots * 16)) {
add(sp, sp, SpillSlots * 16);
if (IsImmAddSub(TotalSpillSlotsSize)) {
add(sp, sp, TotalSpillSlotsSize);
} else {
// Too big to fit in a 12bit immediate
LoadConstant(x0, SpillSlots * 16);
LoadConstant(x0, TotalSpillSlotsSize);
add(sp, sp, x0);
}
}
Expand Down
56 changes: 41 additions & 15 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp
Expand Up @@ -446,9 +446,9 @@ DEF_OP(StoreContextIndexed) {
}

DEF_OP(SpillRegister) {
auto Op = IROp->C<IR::IROp_SpillRegister>();
const auto Op = IROp->C<IR::IROp_SpillRegister>();
const uint8_t OpSize = IROp->Size;
const uint32_t SlotOffset = Op->Slot * 16;
const uint32_t SlotOffset = Op->Slot * MaxSpillSlotSize;

if (Op->Class == FEXCore::IR::GPRClass) {
switch (OpSize) {
Expand All @@ -468,33 +468,46 @@ DEF_OP(SpillRegister) {
str(GetReg<RA_64>(Op->Value.ID()), MemOperand(sp, SlotOffset));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
default:
LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
break;
}
} else if (Op->Class == FEXCore::IR::FPRClass) {
const auto Src = GetSrc(Op->Value.ID());

switch (OpSize) {
case 4: {
str(GetSrc(Op->Value.ID()).S(), MemOperand(sp, SlotOffset));
str(Src.S(), MemOperand(sp, SlotOffset));
break;
}
case 8: {
str(GetSrc(Op->Value.ID()).D(), MemOperand(sp, SlotOffset));
str(Src.D(), MemOperand(sp, SlotOffset));
break;
}
case 16: {
str(GetSrc(Op->Value.ID()), MemOperand(sp, SlotOffset));
str(Src, MemOperand(sp, SlotOffset));
break;
}
case 32: {
// TODO: Eliminate ptrue with statically allocated predicate register.
ptrue(p7.VnB(), SVE_VL32);
mov(TMP3, SlotOffset);
st1b(Src.Z().VnB(), p7, SVEMemOperand(sp, TMP3));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
default:
LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
break;
}
} else {
LOGMAN_MSG_A_FMT("Unhandled SpillRegister class: {}", Op->Class.Val);
}
}

DEF_OP(FillRegister) {
auto Op = IROp->C<IR::IROp_FillRegister>();
uint8_t OpSize = IROp->Size;
uint32_t SlotOffset = Op->Slot * 16;
const auto Op = IROp->C<IR::IROp_FillRegister>();
const uint8_t OpSize = IROp->Size;
const uint32_t SlotOffset = Op->Slot * MaxSpillSlotSize;

if (Op->Class == FEXCore::IR::GPRClass) {
switch (OpSize) {
Expand All @@ -514,23 +527,36 @@ DEF_OP(FillRegister) {
ldr(GetReg<RA_64>(Node), MemOperand(sp, SlotOffset));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
default:
LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
break;
}
} else if (Op->Class == FEXCore::IR::FPRClass) {
const auto Dst = GetDst(Node);

switch (OpSize) {
case 4: {
ldr(GetDst(Node).S(), MemOperand(sp, SlotOffset));
ldr(Dst.S(), MemOperand(sp, SlotOffset));
break;
}
case 8: {
ldr(GetDst(Node).D(), MemOperand(sp, SlotOffset));
ldr(Dst.D(), MemOperand(sp, SlotOffset));
break;
}
case 16: {
ldr(GetDst(Node), MemOperand(sp, SlotOffset));
ldr(Dst, MemOperand(sp, SlotOffset));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
case 32: {
// TODO: Eliminate ptrue with statically allocated predicate register.
ptrue(p7.VnB(), SVE_VL32);
mov(TMP3, SlotOffset);
ld1b(Dst.Z().VnB(), p7.Zeroing(), SVEMemOperand(sp, TMP3));
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
break;
}
} else {
LOGMAN_MSG_A_FMT("Unhandled FillRegister class: {}", Op->Class.Val);
Expand Down
Expand Up @@ -33,7 +33,7 @@ namespace FEXCore::CPU {
DEF_OP(SignalReturn) {
// Adjust the stack first for a regular return
if (SpillSlots) {
add(rsp, SpillSlots * 16); // + 8 to consume return address
add(rsp, SpillSlots * MaxSpillSlotSize); // + 8 to consume return address
}

jmp(qword [STATE + offsetof(FEXCore::Core::CpuStateFrame, Pointers.Common.SignalReturnHandler)]);
Expand All @@ -42,7 +42,7 @@ DEF_OP(SignalReturn) {
DEF_OP(CallbackReturn) {
// Adjust the stack first for a regular return
if (SpillSlots) {
add(rsp, SpillSlots * 16); // + 8 to consume return address
add(rsp, SpillSlots * MaxSpillSlotSize); // + 8 to consume return address
}

// Make sure to adjust the refcounter so we don't clear the cache now
Expand Down Expand Up @@ -71,7 +71,7 @@ DEF_OP(ExitFunction) {


if (SpillSlots) {
add(rsp, SpillSlots * 16);
add(rsp, SpillSlots * MaxSpillSlotSize);
}

uint64_t NewRIP;
Expand Down
2 changes: 1 addition & 1 deletion External/FEXCore/Source/Interface/Core/JIT/x86_64/JIT.cpp
Expand Up @@ -599,7 +599,7 @@ void *X86JITCore::CompileCode(uint64_t Entry, [[maybe_unused]] FEXCore::IR::IRLi
SpillSlots = RAData->SpillSlots();

if (SpillSlots) {
sub(rsp, SpillSlots * 16);
sub(rsp, SpillSlots * MaxSpillSlotSize);
}

#ifdef BLOCKSTATS
Expand Down
54 changes: 36 additions & 18 deletions External/FEXCore/Source/Interface/Core/JIT/x86_64/MemoryOps.cpp
Expand Up @@ -333,10 +333,10 @@ DEF_OP(StoreContextIndexed) {
}

DEF_OP(SpillRegister) {
auto Op = IROp->C<IR::IROp_SpillRegister>();
uint8_t OpSize = IROp->Size;
const auto Op = IROp->C<IR::IROp_SpillRegister>();
const uint8_t OpSize = IROp->Size;
const uint32_t SlotOffset = Op->Slot * MaxSpillSlotSize;

uint32_t SlotOffset = Op->Slot * 16;
if (Op->Class == FEXCore::IR::GPRClass) {
switch (OpSize) {
case 1: {
Expand All @@ -355,36 +355,44 @@ DEF_OP(SpillRegister) {
mov(qword [rsp + SlotOffset], GetSrc<RA_64>(Op->Value.ID()));
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
default:
LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
break;
}
} else if (Op->Class == FEXCore::IR::FPRClass) {
const auto Src = GetSrc(Op->Value.ID());

switch (OpSize) {
case 4: {
movss(dword [rsp + SlotOffset], GetSrc(Op->Value.ID()));
movss(dword [rsp + SlotOffset], Src);
break;
}
case 8: {
movsd(qword [rsp + SlotOffset], GetSrc(Op->Value.ID()));
movsd(qword [rsp + SlotOffset], Src);
break;
}
case 16: {
movaps(xword [rsp + SlotOffset], GetSrc(Op->Value.ID()));
movaps(xword [rsp + SlotOffset], Src);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
case 32: {
vmovaps(yword [rsp + SlotOffset], ToYMM(Src));
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize);
break;
}
} else {
LOGMAN_MSG_A_FMT("Unhandled SpillRegister class: {}", Op->Class.Val);
}


}

DEF_OP(FillRegister) {
auto Op = IROp->C<IR::IROp_FillRegister>();
uint8_t OpSize = IROp->Size;
const auto Op = IROp->C<IR::IROp_FillRegister>();
const uint8_t OpSize = IROp->Size;
const uint32_t SlotOffset = Op->Slot * MaxSpillSlotSize;

uint32_t SlotOffset = Op->Slot * 16;
if (Op->Class == FEXCore::IR::GPRClass) {
switch (OpSize) {
case 1: {
Expand All @@ -403,23 +411,33 @@ DEF_OP(FillRegister) {
mov(GetDst<RA_64>(Node), qword [rsp + SlotOffset]);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
default:
LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
break;
}
} else if (Op->Class == FEXCore::IR::FPRClass) {
const auto Dst = GetDst(Node);

switch (OpSize) {
case 4: {
movss(GetDst(Node), dword [rsp + SlotOffset]);
movss(Dst, dword [rsp + SlotOffset]);
break;
}
case 8: {
movsd(GetDst(Node), qword [rsp + SlotOffset]);
movsd(Dst, qword [rsp + SlotOffset]);
break;
}
case 16: {
movaps(GetDst(Node), xword [rsp + SlotOffset]);
movaps(Dst, xword [rsp + SlotOffset]);
break;
}
default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
case 32: {
vmovaps(ToYMM(Dst), yword [rsp + SlotOffset]);
break;
}
default:
LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize);
break;
}
} else {
LOGMAN_MSG_A_FMT("Unhandled FillRegister class: {}", Op->Class.Val);
Expand Down
Expand Up @@ -47,7 +47,7 @@ DEF_OP(Break) {
auto Op = IROp->C<IR::IROp_Break>();

if (SpillSlots) {
add(rsp, SpillSlots * 16);
add(rsp, SpillSlots * MaxSpillSlotSize);
}

mov(byte [STATE + offsetof(FEXCore::Core::CpuStateFrame, SynchronousFaultData.FaultToTopAndGeneratedException)], 1);
Expand Down
4 changes: 4 additions & 0 deletions External/FEXCore/include/FEXCore/Core/CPUBackend.h
Expand Up @@ -122,6 +122,10 @@ namespace CPU {
bool IsAddressInCodeBuffer(uintptr_t Address) const;

protected:
// Max spill slot size in bytes. We need at most 32 bytes
// to be able to handle a 256-bit vector store to a slot.
constexpr static uint32_t MaxSpillSlotSize = 32;

FEXCore::Core::InternalThreadState *ThreadState;

size_t InitialCodeSize, MaxCodeSize;
Expand Down

0 comments on commit 0d0d116

Please sign in to comment.