Skip to content

Commit

Permalink
Merge pull request #18 from SupernaviX/scheduling
Browse files Browse the repository at this point in the history
Scheduling improvements
  • Loading branch information
SupernaviX committed May 9, 2024
2 parents 22250e2 + 8551581 commit df849b3
Show file tree
Hide file tree
Showing 11 changed files with 126 additions and 137 deletions.
1 change: 0 additions & 1 deletion llvm/lib/Target/V810/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ add_llvm_target(V810CodeGen
V810AsmPrinter.cpp
V810BranchSelector.cpp
V810FrameLowering.cpp
V810HazardRecognizer.cpp
V810InstrInfo.cpp
V810ISelDAGToDAG.cpp
V810ISelLowering.cpp
Expand Down
64 changes: 0 additions & 64 deletions llvm/lib/Target/V810/V810HazardRecognizer.cpp

This file was deleted.

27 changes: 0 additions & 27 deletions llvm/lib/Target/V810/V810HazardRecognizer.h

This file was deleted.

73 changes: 53 additions & 20 deletions llvm/lib/Target/V810/V810InstrInfo.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include "V810HazardRecognizer.h"
#include "V810InstrInfo.h"
#include "V810.h"
#include "V810Subtarget.h"
Expand Down Expand Up @@ -442,6 +441,59 @@ bool V810InstrInfo::optimizeCompareInstr(MachineInstr &MI, Register SrcReg,
return true;
}

bool V810InstrInfo::shouldClusterMemOps(
ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const {
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
const MachineOperand *BaseOp1 = BaseOps1.front();
const MachineOperand *BaseOp2 = BaseOps2.front();

// Cluster loads, not stores
return BaseOp1->getParent()->mayLoad() && BaseOp2->getParent()->mayLoad();
}

bool V810InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const {
OffsetIsScalable = false;
switch (MI.getOpcode()) {
case V810::IN_B:
case V810::LD_B:
case V810::OUT_B:
case V810::ST_B:
Width = LocationSize::precise(1);
break;
case V810::IN_H:
case V810::LD_H:
case V810::OUT_H:
case V810::ST_H:
Width = LocationSize::precise(2);
break;
case V810::CAXI:
case V810::IN_W:
case V810::LD_W:
case V810::OUT_W:
case V810::ST_W:
Width = LocationSize::precise(4);
break;
default:
return false;
}
unsigned BasePos = MI.mayLoad() ? 1 : 0;
unsigned OffsetPos = BasePos + 1;
if (!MI.getOperand(OffsetPos).isImm()) {
return false;
}
BaseOps.push_back(&MI.getOperand(BasePos));
Offset = MI.getOperand(OffsetPos).getImm();
return true;
}

unsigned V810InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (MI.isInlineAsm()) {
const MachineFunction *MF = MI.getParent()->getParent();
Expand All @@ -452,22 +504,3 @@ unsigned V810InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
return get(Opcode).getSize();
}

ScheduleHazardRecognizer *
V810InstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
return new V810HazardRecognizer();
}

ScheduleHazardRecognizer *
V810InstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
return new V810HazardRecognizer();
}

ScheduleHazardRecognizer*
V810InstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAGMI *DAG) const {
if (!DAG->hasVRegLiveness())
return new V810HazardRecognizer();
return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
}
19 changes: 11 additions & 8 deletions llvm/lib/Target/V810/V810InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ class V810InstrInfo : public V810GenInstrInfo {
Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;

bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
int64_t Offset1, bool OffsetIsScalable1,
ArrayRef<const MachineOperand *> BaseOps2,
int64_t Offset2, bool OffsetIsScalable2,
unsigned ClusterSize,
unsigned NumBytes) const override;
bool getMemOperandsWithOffsetWidth(
const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
const TargetRegisterInfo *TRI) const override;

bool isUnpredicatedTerminatorBesidesNop(const MachineInstr &MI) const;

public:
Expand All @@ -102,14 +113,6 @@ class V810InstrInfo : public V810GenInstrInfo {

virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const override;

virtual ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
virtual ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;
virtual ScheduleHazardRecognizer *
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAGMI *DAG) const override;
};

}
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/V810/V810InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def CC_NOP : CC_VAL<13>; // Never (nop)
def CC_GE : CC_VAL<14>; // Greater than or equal (signed)
def CC_GT : CC_VAL<15>; // Greater than

let mayLoad = 1 in {
let mayLoad = 1, Itinerary = IIC_Load in {
def LD_W : FormatVI<0b110011,
(outs GenRegs:$r2), (ins (MEMri $r1, $disp):$addr),
"ld.w $addr, $r2",
Expand Down Expand Up @@ -192,7 +192,7 @@ let mayLoad = 1 in {
>;
}

let mayStore = 1 in {
let mayStore = 1, Itinerary = IIC_Store in {
def ST_W : FormatVI<0b110111,
(outs), (ins (MEMri $r1, $disp):$addr, GenRegs:$r2),
"st.w $r2, $addr",
Expand Down Expand Up @@ -685,6 +685,7 @@ class BitstringInst<bits<5> subop, string asmstr>
let imm = subop;
let Uses = [R26, R27, R28, R29, R30];
let Defs = [R26, R27, R28, R29, R30];
let Itinerary = IIC_Store;
let mayLoad = 1;
let mayStore = 1;
}
Expand All @@ -706,6 +707,7 @@ class BitstringSearchInst<bits<5> subop, string asmstr>
let Uses = [R27, R28, R29, R30];
let Defs = [R27, R28, R29, R30, SR5];
let mayLoad = 1;
let Itinerary = IIC_Load;
}

def SCH0BSD : BitstringSearchInst<0b00001, "sch0bsd">;
Expand All @@ -717,7 +719,8 @@ def SCH1BSU : BitstringSearchInst<0b00010, "sch1bsu">;
// $rcmp is always the same as r2, and rswap is always R30.
// The latter is guaranteed by using custom lowering for ATOMIC_CMP_SWAP.
let mayLoad = 1, mayStore = 1, Uses = [R30], Defs = [SR5],
Constraints = "$r2 = $rcmp", DisableEncoding = "$rcmp,$rswap" in
Constraints = "$r2 = $rcmp", DisableEncoding = "$rcmp,$rswap",
Itinerary = IIC_Store in
def CAXI : FormatVI<0b111010,
(outs GenRegs:$r2), (ins (MEMri $r1, $disp):$addr, GenRegs:$rcmp, GenRegs:$rswap),
"caxi $addr, $r2",
Expand Down
31 changes: 23 additions & 8 deletions llvm/lib/Target/V810/V810Schedule.td
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@

// Below "Itineraries" are only used by the ScoreboardHazardRecognizer
def MemoryUnit : FuncUnit;

def IIC_Load : InstrItinClass;
def IIC_Store : InstrItinClass;

def V810Itineraries : ProcessorItineraries<
[MemoryUnit],
[],
[
InstrItinData<IIC_Load, [InstrStage<0, [MemoryUnit]>]>,
InstrItinData<IIC_Store, [InstrStage<2, [MemoryUnit]>]>,
]>;

// The Schedule Machine Model itself

def V810Model : SchedMachineModel {
let MicroOpBufferSize = 0; // In-order
let IssueWidth = 1; // One thing happens per cycle
let PostRAScheduler = 1;

let Itineraries = V810Itineraries;
}

// Resources used by scheduler definitions
Expand All @@ -30,14 +45,14 @@ def WriteBranch : SchedWrite; // jump jump
// Tie it all together
let SchedModel = V810Model in {

def : WriteRes<WriteI, []> { let Latency = 1; }
def : WriteRes<WriteALU, []> { let Latency = 1; }
def : WriteRes<WriteIMul, []> { let Latency = 13; }
def : WriteRes<WriteIDiv, []> { let Latency = 38; }
def : WriteRes<WriteFPU, [V810UnitFPU]> { let Latency = 28; } // everything is the same number of cycles because I am lazy
def : WriteRes<WriteLoad, [V810UnitMemory]> { let Latency = 1; } // TODO: translate timings to scheduler model
def : WriteRes<WriteStore, [V810UnitMemory]> { let Latency = 1; } // TODO: translate timings to scheduler model
def : WriteRes<WriteBranch, [V810UnitBranch]> { let Latency = 3; } // TODO: how do I say "this flushes the pipeline"
def : WriteRes<WriteI, []>;
def : WriteRes<WriteALU, []>;
def : WriteRes<WriteIMul, []>;
def : WriteRes<WriteIDiv, []>;
def : WriteRes<WriteFPU, [V810UnitFPU]>;
def : WriteRes<WriteLoad, [V810UnitMemory]>;
def : WriteRes<WriteStore, [V810UnitMemory]> { let ReleaseAtCycles = [2]; }
def : WriteRes<WriteBranch, [V810UnitBranch]>; // TODO: how do I say "this flushes the pipeline"

// the COPY pseudo-instr needs scheduling info
def : InstRW<[WriteI], (instrs COPY)>;
Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Target/V810/V810Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "V810GenSubtargetInfo.inc"

static StringRef getCPUName(const Triple &TT, StringRef CPU) {
if (CPU.empty() && TT.getOSAndEnvironmentName() == "vb")
return "vb";
return CPU;
}

V810Subtarget &V810Subtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef CPU,
StringRef FS) {
IsNintendo = false;
EnableGPRelativeRAM = false;
EnableAppRegisters = false;

std::string CPUName = std::string(CPU);
if (CPUName.empty() && TT.getOSAndEnvironmentName() == "vb")
CPUName = "vb";
StringRef CPUName = getCPUName(TT, CPU);
ParseSubtargetFeatures(CPUName, /*TuneCPU*/ CPUName, FS);

return *this;
Expand All @@ -27,4 +31,5 @@ V810Subtarget &V810Subtarget::initializeSubtargetDependencies(const Triple &TT,
V810Subtarget::V810Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: V810GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(TT, CPU, FS)), FrameLowering() {}
InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(TT, CPU, FS)), FrameLowering(),
InstrItins(getInstrItineraryForCPU(getCPUName(TT, CPU))) {}
4 changes: 4 additions & 0 deletions llvm/lib/Target/V810/V810Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@ class V810Subtarget : public V810GenSubtargetInfo {
V810TargetLowering TLInfo;
V810SelectionDAGInfo TSInfo;
V810FrameLowering FrameLowering;
InstrItineraryData InstrItins;
public:
V810Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);

const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
}
const V810InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const V810FrameLowering *getFrameLowering() const override {
return &FrameLowering;
Expand Down
Loading

0 comments on commit df849b3

Please sign in to comment.