diff --git a/example/optsched-cfg/sched.ini b/example/optsched-cfg/sched.ini index f74a6feb..ed7e87cb 100644 --- a/example/optsched-cfg/sched.ini +++ b/example/optsched-cfg/sched.ini @@ -95,6 +95,10 @@ SECOND_PASS_LENGTH_TIMEOUT 5 # BLOCK : use the time limits in the above fields as is TIMEOUT_PER INSTR +# The maximum number of instructions to use the scheduler for. +# Beyond this size, the heuristic scheduler is used. +MAX_REGION_LENGTH 2147483647 + # The heuristic used for the list scheduler. Valid values are any combination of: # CP: critical path # LUC: last use count @@ -243,9 +247,29 @@ REGIONS_TO_SCHEDULE fft1D_512:114 # history domination is disabled. ENABLE_SUFFIX_CONCATENATION NO +# Where to perform graph transformations. Valid values are any combination of: +# BH - before heuristic; run on all blocks that we schedule +# AH - after heuristic; only if the heuristic scheduler doesn't prove optimality +GT_POSITION AH + +# Where to perform graph transformations for the second pass. +# Valid values are the same as with GT_POSITION. +# However, note that the sequential list scheduler is practically never +# going to give an optimal schedule, so BH is almost certainly superior. +2ND_PASS_GT_POSITION BH + # Whether to apply the node superiority graph transformation. STATIC_NODE_SUPERIORITY NO +# Whether to apply the ILP only node superiority graph transformation. +STATIC_NODE_SUPERIORITY_ILP NO + +# Whether to apply the combined node superiority graph transformation. +STATIC_NODE_SUPERIORITY_ILP_PRESERVE_OCCUPANCY NO + +# Whether the second pass of the two pass algorithm should use the combined node superiority graph transformation. +2ND_PASS_ILP_NODE_SUPERIORITY_PRESERVING_OCCUPANCY NO + # Whether to apply node superiority in multiple passes. MULTI_PASS_NODE_SUPERIORITY NO diff --git a/include/opt-sched/Scheduler/bb_spill.h b/include/opt-sched/Scheduler/bb_spill.h index 11aa31f9..e6d81bf4 100644 --- a/include/opt-sched/Scheduler/bb_spill.h +++ b/include/opt-sched/Scheduler/bb_spill.h @@ -118,7 +118,7 @@ class BBWithSpill : public SchedRegion { SchedPriorities hurstcPrirts, SchedPriorities enumPrirts, bool vrfySched, Pruning PruningStrategy, bool SchedForRPOnly, bool enblStallEnum, int SCW, SPILL_COST_FUNCTION spillCostFunc, - SchedulerType HeurSchedType); + SchedulerType HeurSchedType, GT_POSITION GraphTransPosition); ~BBWithSpill(); InstCount CmputExecCostLwrBound(); diff --git a/include/opt-sched/Scheduler/graph_trans.h b/include/opt-sched/Scheduler/graph_trans.h index b820f0b6..4cd28d4d 100644 --- a/include/opt-sched/Scheduler/graph_trans.h +++ b/include/opt-sched/Scheduler/graph_trans.h @@ -91,6 +91,13 @@ class StaticNodeSupTrans : public GraphTrans { static bool isNodeSuperior(DataDepGraph &DDG, int A, int B); + struct Statistics { + int NumEdgesAdded = 0; + int NumEdgesRemoved = 0; + }; + static void removeRedundantEdges(DataDepGraph &DDG, int i, int j, + Statistics &Stats); + private: // Are multiple passes enabled. bool IsMultiPass; @@ -103,8 +110,9 @@ class StaticNodeSupTrans : public GraphTrans { // Check if there is superiority involving nodes A and B. If yes, choose which // edge to add. - // Returns true if a superior edge was added. - bool TryAddingSuperiorEdge_(SchedInstruction *nodeA, SchedInstruction *nodeB); + // Returns the added edge if added, else nullptr + GraphEdge *TryAddingSuperiorEdge_(SchedInstruction *nodeA, + SchedInstruction *nodeB); // Keep trying to find superior nodes until none can be found or there are no // more independent nodes. diff --git a/include/opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h b/include/opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h new file mode 100644 index 00000000..e1d1acd0 --- /dev/null +++ b/include/opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h @@ -0,0 +1,31 @@ +/******************************************************************************* +Description: Implement graph transformations to be applied before scheduling. +Author: Justin Bassett +Created: Aug. 2020 +Last Update: Aug. 2020 +*******************************************************************************/ + +#ifndef OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H +#define OPTSCHED_BASIC_GRAPH_TRANS_ILP_OCCUPANCY_PRESERVING_H + +#include "opt-sched/Scheduler/graph_trans.h" + +namespace llvm { +namespace opt_sched { + +// Node superiority Occupancy preserving ILP graph transformation. +class StaticNodeSupOccupancyPreservingILPTrans : public GraphTrans { +public: + StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *dataDepGraph); + + const char *Name() const override { + return "occupancy-preserving-ilp.nodesup"; + } + + FUNC_RESULT ApplyTrans() override; +}; + +} // namespace opt_sched +} // namespace llvm + +#endif diff --git a/include/opt-sched/Scheduler/ready_list.h b/include/opt-sched/Scheduler/ready_list.h index 0a281dff..f580834d 100644 --- a/include/opt-sched/Scheduler/ready_list.h +++ b/include/opt-sched/Scheduler/ready_list.h @@ -86,6 +86,13 @@ class ReadyList { // Constructs the priority-list key based on the schemes listed in prirts_. unsigned long CmputKey_(SchedInstruction *inst, bool isUpdate, bool &changed); + template + void ForEachReadyInstruction(InstructionVisitor &&visitor) const { + for (const SchedInstruction &Inst : prirtyLst_) { + visitor(Inst); + } + } + private: // An ordered vector of priorities SchedPriorities prirts_; diff --git a/include/opt-sched/Scheduler/register.h b/include/opt-sched/Scheduler/register.h index 3c43c1a0..35a1a3f2 100644 --- a/include/opt-sched/Scheduler/register.h +++ b/include/opt-sched/Scheduler/register.h @@ -13,6 +13,7 @@ Last Update: Jun. 2017 #include "opt-sched/Scheduler/sched_basic_data.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" #include using namespace llvm; @@ -87,6 +88,8 @@ class Register { bool IsInPossibleInterval(const SchedInstruction *inst) const; const InstSetType &GetPossibleLiveInterval() const; + void resetLiveInterval(); + private: int16_t type_; int num_; @@ -124,10 +127,75 @@ class Register { // Represents a file of registers of a certain type and tracks their usages. class RegisterFile { + template ::type> + class RegisterFileIterator + : public llvm::iterator_facade_base, + std::random_access_iterator_tag, R> { + + public: + RegisterFileIterator() = default; + explicit RegisterFileIterator(const RegisterFile &File, int Index) + : File(&File), Index(Index) {} + + template ::type = 0> + RegisterFileIterator(const RegisterFileIterator &Rhs) noexcept + : File(Rhs.File), Index(Rhs.Index) {} + + bool operator==(const RegisterFileIterator &Rhs) const { + assert(File == Rhs.File); + return Index == Rhs.Index; + } + + bool operator<(const RegisterFileIterator &Rhs) const { + assert(File == Rhs.File); + return Index < Rhs.Index; + } + + std::ptrdiff_t operator-(const RegisterFileIterator &Rhs) const { + return Index - Rhs.Index; + } + + R &operator*() const { return *File->GetReg(Index); } + + RegisterFileIterator &operator++() { + ++Index; + return *this; + } + + RegisterFileIterator &operator--() { + --Index; + return *this; + } + + RegisterFileIterator &operator+=(std::ptrdiff_t n) { + Index += n; + return *this; + } + + RegisterFileIterator &operator-=(std::ptrdiff_t n) { + Index -= n; + return *this; + } + + private: + const RegisterFile *File = nullptr; + int Index = 0; + }; + public: + using iterator = RegisterFileIterator; + using const_iterator = RegisterFileIterator; + RegisterFile(); ~RegisterFile(); + iterator begin() { return iterator(*this, 0); } + iterator end() { return iterator(*this, GetRegCnt()); } + const_iterator begin() const { return const_iterator(*this, 0); } + const_iterator end() const { return const_iterator(*this, GetRegCnt()); } + int GetRegCnt() const; void SetRegCnt(int regCnt); @@ -157,7 +225,7 @@ class RegisterFile { private: int16_t regType_; int physRegCnt_; - mutable SmallVector, 8> Regs; + SmallVector, 8> Regs; }; } // namespace opt_sched diff --git a/include/opt-sched/Scheduler/sched_basic_data.h b/include/opt-sched/Scheduler/sched_basic_data.h index 5602fdc3..075acd43 100644 --- a/include/opt-sched/Scheduler/sched_basic_data.h +++ b/include/opt-sched/Scheduler/sched_basic_data.h @@ -425,9 +425,9 @@ class SchedInstruction : public GraphNode { void ComputeAdjustedUseCnt(SchedInstruction *inst); int16_t CmputLastUseCnt(); - int16_t GetLastUseCnt() { return lastUseCnt_; } + int16_t GetLastUseCnt() const { return lastUseCnt_; } - InstType GetCrtclPathFrmRoot() { return crtclPathFrmRoot_; } + InstType GetCrtclPathFrmRoot() const { return crtclPathFrmRoot_; } friend class SchedRange; diff --git a/include/opt-sched/Scheduler/sched_region.h b/include/opt-sched/Scheduler/sched_region.h index 88115aaa..6398499c 100644 --- a/include/opt-sched/Scheduler/sched_region.h +++ b/include/opt-sched/Scheduler/sched_region.h @@ -39,6 +39,31 @@ enum class BLOCKS_TO_KEEP { ALL }; +// Where to perform graph transformations; flag enum +enum class GT_POSITION : uint32_t { + NONE = 0x0, + // Run on all blocks before the heuristic + BEFORE_HEURISTIC = 0x1, + // Run only if the heuristic scheduler doesn't prove the schedule optimal + AFTER_HEURISTIC = 0x2, +}; + +inline GT_POSITION operator|(GT_POSITION lhs, GT_POSITION rhs) { + return (GT_POSITION)((uint32_t)lhs | (uint32_t)rhs); +} + +inline GT_POSITION operator&(GT_POSITION lhs, GT_POSITION rhs) { + return (GT_POSITION)((uint32_t)lhs & (uint32_t)rhs); +} + +inline GT_POSITION &operator|=(GT_POSITION &lhs, GT_POSITION rhs) { + return lhs = lhs | rhs; +} + +inline GT_POSITION &operator&=(GT_POSITION &lhs, GT_POSITION rhs) { + return lhs = lhs & rhs; +} + class ListScheduler; class SchedRegion { @@ -48,7 +73,8 @@ class SchedRegion { int16_t sigHashSize, LB_ALG lbAlg, SchedPriorities hurstcPrirts, SchedPriorities enumPrirts, bool vrfySched, Pruning PruningStrategy, SchedulerType HeurSchedType, - SPILL_COST_FUNCTION spillCostFunc = SCF_PERP); + SPILL_COST_FUNCTION spillCostFunc, + GT_POSITION GraphTransPosition); // Destroys the region. Must be overriden by child classes. virtual ~SchedRegion() {} @@ -205,6 +231,9 @@ class SchedRegion { // TODO(max): Document. int16_t sigHashSize_; + // Where to apply graph transformations + GT_POSITION GraphTransPosition_; + // The pruning technique to use for this region. Pruning prune_; @@ -224,6 +253,11 @@ class SchedRegion { // The best schedule found so far (may be heuristic or enumerator generated) InstSchedule *bestSched_; + void CalculateUpperBounds(bool BbSchedulerEnabled); + void CalculateLowerBounds(bool BbSchedulerEnabled); + + bool IsLowerBoundSet_ = false; + bool IsUpperBoundSet_ = false; // TODO(max): Document. InstCount schedLwrBound_; // TODO(max): Document. @@ -244,6 +278,8 @@ class SchedRegion { // TODO(max): Document. InstCount crntSlotNum_; + bool needsTransitiveClosure(Milliseconds rgnTimeout) const; + // protected accessors: SchedulerType GetHeuristicSchedulerType() const { return HeurSchedType_; } @@ -324,6 +360,13 @@ class SchedRegion { FUNC_RESULT runACO(InstSchedule *ReturnSched, InstSchedule *InitSched, bool IsPostBB); + + FUNC_RESULT applyGraphTransformations(bool BbScheduleEnabled, + InstSchedule *heuristicSched, + bool &isLstOptml, + InstSchedule *&bestSched); + FUNC_RESULT applyGraphTransformation(GraphTrans *GT); + void updateBoundsAfterGraphTransformations(bool BbSchedulerEnabled); }; } // namespace opt_sched diff --git a/include/opt-sched/Scheduler/utilities.h b/include/opt-sched/Scheduler/utilities.h index f38cd2d7..771b45e3 100644 --- a/include/opt-sched/Scheduler/utilities.h +++ b/include/opt-sched/Scheduler/utilities.h @@ -22,7 +22,14 @@ uint16_t clcltBitsNeededToHoldNum(uint64_t value); Milliseconds GetProcessorTime(); // Returns a reference to an object that is supposed to initialized with the // start time of the process -extern std::chrono::high_resolution_clock::time_point startTime; +extern std::chrono::steady_clock::time_point startTime; + +// Executes the function, returning the number of milliseconds it took to do so. +template Milliseconds countMillisToExecute(F &&fn) { + const Milliseconds Start = GetProcessorTime(); + fn(); + return GetProcessorTime() - Start; +} } // namespace Utilities inline uint16_t Utilities::clcltBitsNeededToHoldNum(uint64_t value) { @@ -36,7 +43,7 @@ inline uint16_t Utilities::clcltBitsNeededToHoldNum(uint64_t value) { } inline Milliseconds Utilities::GetProcessorTime() { - auto currentTime = std::chrono::high_resolution_clock::now(); + auto currentTime = std::chrono::steady_clock::now(); std::chrono::duration elapsed = currentTime - startTime; return elapsed.count(); } diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index f831478b..3e2045b1 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -8,6 +8,7 @@ set(OPTSCHED_SRCS Scheduler/aco.cpp Scheduler/graph.cpp Scheduler/graph_trans.cpp Scheduler/graph_trans_ilp.cpp + Scheduler/graph_trans_ilp_occupancy_preserving.cpp Scheduler/hist_table.cpp Scheduler/list_sched.cpp Scheduler/logger.cpp diff --git a/lib/Scheduler/bb_spill.cpp b/lib/Scheduler/bb_spill.cpp index 53a50975..f2454aff 100644 --- a/lib/Scheduler/bb_spill.cpp +++ b/lib/Scheduler/bb_spill.cpp @@ -11,6 +11,7 @@ #include "opt-sched/Scheduler/relaxed_sched.h" #include "opt-sched/Scheduler/stats.h" #include "opt-sched/Scheduler/utilities.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include @@ -38,10 +39,11 @@ BBWithSpill::BBWithSpill(const OptSchedTarget *OST_, DataDepGraph *dataDepGraph, Pruning PruningStrategy, bool SchedForRPOnly, bool enblStallEnum, int SCW, SPILL_COST_FUNCTION spillCostFunc, - SchedulerType HeurSchedType) + SchedulerType HeurSchedType, + GT_POSITION GraphTransPosition) : SchedRegion(OST_->MM, dataDepGraph, rgnNum, sigHashSize, lbAlg, hurstcPrirts, enumPrirts, vrfySched, PruningStrategy, - HeurSchedType, spillCostFunc), + HeurSchedType, spillCostFunc, GraphTransPosition), OST(OST_) { enumrtr_ = NULL; optmlSpillCost_ = INVALID_VALUE; @@ -164,22 +166,25 @@ void BBWithSpill::CmputSchedUprBound_() { static InstCount ComputeSLILStaticLowerBound(int64_t regTypeCnt_, RegisterFile *regFiles_, DataDepGraph *dataDepGraph_) { + // Reset the live ranges so that we compute the lower bound correctly if we've + // already computed it before. + const auto RegFiles = llvm::makeMutableArrayRef(regFiles_, regTypeCnt_); + for (RegisterFile &File : RegFiles) { + for (Register &Reg : File) { + Reg.resetLiveInterval(); + } + } + // (Chris): To calculate a naive lower bound of the SLIL, count all the defs // and uses for each register. int naiveLowerBound = 0; - for (int i = 0; i < regTypeCnt_; ++i) { - for (int j = 0; j < regFiles_[i].GetRegCnt(); ++j) { - const auto ® = regFiles_[i].GetReg(j); - for (const auto &instruction : reg->GetDefList()) { - if (reg->AddToInterval(instruction)) { - ++naiveLowerBound; - } - } - for (const auto &instruction : reg->GetUseList()) { - if (reg->AddToInterval(instruction)) { - ++naiveLowerBound; - } - } + for (RegisterFile &File : RegFiles) { + for (Register &Reg : File) { + const auto added_to_interval = [&](const SchedInstruction *instruction) { + return Reg.AddToInterval(instruction); + }; + naiveLowerBound += llvm::count_if(Reg.GetDefList(), added_to_interval) + + llvm::count_if(Reg.GetUseList(), added_to_interval); } } diff --git a/lib/Scheduler/enumerator.cpp b/lib/Scheduler/enumerator.cpp index bc6033e3..deeaac5f 100644 --- a/lib/Scheduler/enumerator.cpp +++ b/lib/Scheduler/enumerator.cpp @@ -982,6 +982,12 @@ bool Enumerator::FindNxtFsblBrnch_(EnumTreeNode *&newNode) { // rdyLst_->Print(Logger::GetLogStream()); stats::maxReadyListSize.SetMax(rdyInstCnt); + + rdyLst_->ForEachReadyInstruction([](const SchedInstruction &Inst) { + Logger::Info("Inst %d has: LUC %d CP %d NID %d", Inst.GetNum(), + Inst.GetLastUseCnt(), Inst.GetCrtclPath(DIR_BKWRD), + Inst.GetNodeID()); + }); #endif if (crntBrnchNum == 0 && SchedForRPOnly_) diff --git a/lib/Scheduler/graph_trans.cpp b/lib/Scheduler/graph_trans.cpp index 7d7833cf..32dfa2de 100644 --- a/lib/Scheduler/graph_trans.cpp +++ b/lib/Scheduler/graph_trans.cpp @@ -76,33 +76,24 @@ StaticNodeSupTrans::StaticNodeSupTrans(DataDepGraph *dataDepGraph, IsMultiPass = IsMultiPass_; } -static void addRPSuperiorEdge(DataDepGraph &DDG, SchedInstruction *A, - SchedInstruction *B) { +static GraphEdge *addRPSuperiorEdge(DataDepGraph &DDG, SchedInstruction *A, + SchedInstruction *B) { DEBUG_LOG("Node %d is superior to node %d", A->GetNum(), B->GetNum()); - addSuperiorEdge(DDG, A, B); + return addSuperiorEdge(DDG, A, B); } -bool StaticNodeSupTrans::TryAddingSuperiorEdge_(SchedInstruction *nodeA, - SchedInstruction *nodeB) { - // Return this flag which designates whether an edge was added. - bool edgeWasAdded = false; - +GraphEdge *StaticNodeSupTrans::TryAddingSuperiorEdge_(SchedInstruction *nodeA, + SchedInstruction *nodeB) { if (nodeA->GetNodeID() > nodeB->GetNodeID()) std::swap(nodeA, nodeB); if (NodeIsSuperior_(nodeA, nodeB)) { - addRPSuperiorEdge(*GetDataDepGraph_(), nodeA, nodeB); - edgeWasAdded = true; + return addRPSuperiorEdge(*GetDataDepGraph_(), nodeA, nodeB); } else if (NodeIsSuperior_(nodeB, nodeA)) { - addRPSuperiorEdge(*GetDataDepGraph_(), nodeB, nodeA); - // Swap nodeIDs - // int tmp = nodeA->GetNodeID(); - // nodeA->SetNodeID(nodeB->GetNodeID()); - // nodeB->SetNodeID(tmp); - edgeWasAdded = true; + return addRPSuperiorEdge(*GetDataDepGraph_(), nodeB, nodeA); } - return edgeWasAdded; + return nullptr; } FUNC_RESULT StaticNodeSupTrans::ApplyTrans() { @@ -110,8 +101,7 @@ FUNC_RESULT StaticNodeSupTrans::ApplyTrans() { DataDepGraph *graph = GetDataDepGraph_(); // A list of independent nodes. std::list> indepNodes; - bool didAddEdge = false; - int NumAdded = 0; + Statistics stats; Logger::Event("GraphTransRPNodeSuperiority"); // For the first pass visit all nodes. Add sets of independent nodes to a @@ -126,20 +116,23 @@ FUNC_RESULT StaticNodeSupTrans::ApplyTrans() { DEBUG_LOG("Checking nodes %d:%d", i, j); if (areNodesIndependent(nodeA, nodeB)) { - didAddEdge = TryAddingSuperiorEdge_(nodeA, nodeB); + GraphEdge *edge = TryAddingSuperiorEdge_(nodeA, nodeB); // If the nodes are independent and no superiority was found add the // nodes to a list for // future passes. - if (!didAddEdge) + if (!edge) indepNodes.push_back(std::make_pair(nodeA, nodeB)); - else - NumAdded++; + else { + stats.NumEdgesAdded++; + removeRedundantEdges(*graph, edge->from->GetNum(), edge->to->GetNum(), + stats); + } } } } Logger::Event("GraphTransRPNodeSuperiorityFinished", "superior_edges", - NumAdded); + stats.NumEdgesAdded, "removed_edges", stats.NumEdgesRemoved); if (IsMultiPass) nodeMultiPass_(indepNodes); @@ -311,3 +304,68 @@ void StaticNodeSupTrans::nodeMultiPass_( } } } + +//////////////////////////////////// +// Removal of redundant edges: +static bool isRedundant(SchedInstruction *NodeI, SchedInstruction *NodeJ, + GraphEdge &e) { + // If this is the edge we just added, it's not redundant + if (e.from == NodeI && e.to == NodeJ) { + return false; + } + + return NodeJ->IsRcrsvScsr(e.to); +} + +static LinkedList::iterator +removeEdge(LinkedList &Succs, LinkedList::iterator it, + StaticNodeSupTrans::Statistics &stats) { + GraphEdge &e = *it; + it = Succs.RemoveAt(it); + e.to->RemovePredFrom(e.from); + DEBUG_LOG(" Deleting GraphEdge* at %p: (%zu, %zu)", (void *)&e, + e.from->GetNum(), e.to->GetNum()); + delete &e; + ++stats.NumEdgesRemoved; + + return it; +} + +void StaticNodeSupTrans::removeRedundantEdges(DataDepGraph &DDG, // + int i, int j, Statistics &stats) { + DEBUG_LOG(" Removing redundant edges"); + SchedInstruction *NodeI = DDG.GetInstByIndx(i); + SchedInstruction *NodeJ = DDG.GetInstByIndx(j); + + // Check edges from I itself, since GetRecursivePredecessors() doesn't include + // I. + { + LinkedList &ISuccs = NodeI->GetSuccessors(); + for (auto it = ISuccs.begin(); it != ISuccs.end();) { + if (isRedundant(NodeI, NodeJ, *it)) { + it = removeEdge(ISuccs, it, stats); + } else { + ++it; + } + } + } + + // Check edges from a predecessor of I to a successor of J (or J itself). + // We don't need to explicitly check J itself in a separate step because + // the isRedundant() check appropriately considers edges ending at J. + for (GraphNode &Pred : *NodeI->GetRecursivePredecessors()) { + LinkedList &PSuccs = Pred.GetSuccessors(); + + for (auto it = PSuccs.begin(); it != PSuccs.end();) { + if (isRedundant(NodeI, NodeJ, *it)) { + it = removeEdge(PSuccs, it, stats); + } else { + ++it; + } + } + } + + // Don't need to repeat for successors of J, as those are already considered + // by the prior loops. We could have checked the successors of J instead of + // predecessors of I, but we don't need to explicitly check both. +} diff --git a/lib/Scheduler/graph_trans_ilp.cpp b/lib/Scheduler/graph_trans_ilp.cpp index 14736687..cb8ed2d6 100644 --- a/lib/Scheduler/graph_trans_ilp.cpp +++ b/lib/Scheduler/graph_trans_ilp.cpp @@ -315,7 +315,24 @@ static bool isRedundant(SchedInstruction *NodeI, SchedInstruction *NodeJ, const size_t From = castUnsigned(e.from->GetNum()); const size_t To = castUnsigned(e.to->GetNum()); - return NodeJ->IsRcrsvScsr(e.to) && e.label <= DistanceTable[{From, To}]; + const size_t I = castUnsigned(NodeI->GetNum()); + const size_t J = castUnsigned(NodeJ->GetNum()); + + // If this edge is not (I, J) and there is a path through From -> (I, J) -> To + // which is at least as long as this edge's weight, then this edge is + // redundant. + // This is because this path implies that this edge is a transitive edge and + // the length condition shows that this edge doesn't affect the critical path + // distances. + + // Note: DistanceTable[{I, J}] should always be 0 at this point, but with + // resource edges, this may not necessarily be true. + // Note: we don't need to saturate at MaxLatency because it doesn't affect the + // answer. + const int DistThroughIJ = + DistanceTable[{From, I}] + DistanceTable[{I, J}] + DistanceTable[{J, To}]; + + return NodeJ->IsRcrsvScsr(e.to) && e.label <= DistThroughIJ; } static LinkedList::iterator @@ -324,7 +341,8 @@ removeEdge(LinkedList &Succs, LinkedList::iterator it, GraphEdge &e = *it; it = Succs.RemoveAt(it); e.to->RemovePredFrom(e.from); - DEBUG_LOG(" Deleting GraphEdge* at %p: (%zu, %zu)", (void *)&e, From, To); + DEBUG_LOG(" Deleting GraphEdge* at %p: (%zu, %zu)", (void *)&e, + e.from->GetNum(), e.to->GetNum()); delete &e; ++stats.NumEdgesRemoved; diff --git a/lib/Scheduler/graph_trans_ilp_occupancy_preserving.cpp b/lib/Scheduler/graph_trans_ilp_occupancy_preserving.cpp new file mode 100644 index 00000000..76f708e5 --- /dev/null +++ b/lib/Scheduler/graph_trans_ilp_occupancy_preserving.cpp @@ -0,0 +1,80 @@ +#include "opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h" + +#include "opt-sched/Scheduler/graph_trans_ilp.h" +#include "opt-sched/Scheduler/logger.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +using namespace llvm::opt_sched; + +// #define IS_DEBUG_OCCUPANCY_PRESERVING_ILP_GRAPH_TRANS + +#ifdef IS_DEBUG_OCCUPANCY_PRESERVING_ILP_GRAPH_TRANS +#define DEBUG_LOG(...) Logger::Info(__VA_ARGS__) +#else +#define DEBUG_LOG(...) static_cast(0) +#endif + +using ILP = StaticNodeSupILPTrans; +using RP = StaticNodeSupTrans; + +StaticNodeSupOccupancyPreservingILPTrans:: + StaticNodeSupOccupancyPreservingILPTrans(DataDepGraph *DDG) + : GraphTrans(DDG) {} + +FUNC_RESULT StaticNodeSupOccupancyPreservingILPTrans::ApplyTrans() { + Logger::Event("GraphTransOccupancyPreservingILPNodeSuperiority"); + + DataDepGraph &DDG = *GetDataDepGraph_(); + assert(GetNumNodesInGraph_() == DDG.GetNodeCnt()); + + auto Data_ = ILP::createData(DDG); + ILP::Data &Data = Data_.getData(); + + int NumPassedILP = 0; + int NumFailedRP = 0; + + DEBUG_LOG("Starting main algorithm"); + while (!Data.SuperiorNodesList.empty()) { + auto ij = Data.SuperiorNodesList.pop_back_val(); + const int i = ij.first; + const int j = ij.second; + DEBUG_LOG("Considering adding a superior edge (%d, %d)", i, j); + + if (!areNodesIndependent(DDG.GetInstByIndx(i), DDG.GetInstByIndx(j))) { + DEBUG_LOG("Skipping (%d, %d) because nodes are no longer independent\n", + i, j); + continue; + } + ++NumPassedILP; + if (!RP::isNodeSuperior(DDG, i, j)) { + DEBUG_LOG("(%d, %d) failed the occupancy-preserving conditions\n", i, j); + ++NumFailedRP; + continue; + } + + ILP::addZeroLatencyEdge(Data, i, j); + ILP::addNecessaryResourceEdges(Data, i, j); + + ILP::updateDistanceTable(Data, i, j); + // ILP redundant edges are also redundant from RP point of view. + // This is because ILP redundant edges are transitive edges with more + // conditions met, and the RP point of view considers transitive edges to be + // redundant. + ILP::removeRedundantEdges(Data, i, j); + + DEBUG_LOG("Finished iteration for (%d, %d)\n", i, j); + } + + Logger::Event("GraphTransOccupancyPreservingILPNodeSuperiorityFinished", + "superior_edges", Data.Stats.NumEdgesAdded, // + "removed_edges", Data.Stats.NumEdgesRemoved, // + "resource_edges", Data.Stats.NumResourceEdgesAdded, // + "passed_ilp", NumPassedILP, // + "failed_rp", NumFailedRP); + + return RES_SUCCESS; +} diff --git a/lib/Scheduler/register.cpp b/lib/Scheduler/register.cpp index 85d638aa..ddab53de 100644 --- a/lib/Scheduler/register.cpp +++ b/lib/Scheduler/register.cpp @@ -125,6 +125,11 @@ const Register::InstSetType &Register::GetPossibleLiveInterval() const { return possibleLiveIntervalSet_; } +void Register::resetLiveInterval() { + liveIntervalSet_.clear(); + possibleLiveIntervalSet_.clear(); +} + Register::Register(int16_t type, int num, int physicalNumber) { type_ = type; num_ = num; diff --git a/lib/Scheduler/sched_region.cpp b/lib/Scheduler/sched_region.cpp index 478c3cdd..4e4595b2 100644 --- a/lib/Scheduler/sched_region.cpp +++ b/lib/Scheduler/sched_region.cpp @@ -79,7 +79,8 @@ SchedRegion::SchedRegion(MachineModel *machMdl, DataDepGraph *dataDepGraph, SchedPriorities hurstcPrirts, SchedPriorities enumPrirts, bool vrfySched, Pruning PruningStrategy, SchedulerType HeurSchedType, - SPILL_COST_FUNCTION spillCostFunc) { + SPILL_COST_FUNCTION spillCostFunc, + GT_POSITION GraphTransPosition) { machMdl_ = machMdl; dataDepGraph_ = dataDepGraph; rgnNum_ = rgnNum; @@ -110,6 +111,8 @@ SchedRegion::SchedRegion(MachineModel *machMdl, DataDepGraph *dataDepGraph, DumpDDGs_ = GetDumpDDGs(); DDGDumpPath_ = GetDDGDumpPath(); + + GraphTransPosition_ = GraphTransPosition; } void SchedRegion::UseFileBounds_() { @@ -171,6 +174,11 @@ static void dumpDDG(DataDepGraph *DDG, llvm::StringRef DDGDumpPath, std::fclose(f); } +bool SchedRegion::needsTransitiveClosure(Milliseconds rgnTimeout) const { + return isBbEnabled(SchedulerOptions::getInstance(), rgnTimeout) || + !dataDepGraph_->GetGraphTrans()->empty() || needsSLIL(); +} + FUNC_RESULT SchedRegion::FindOptimalSchedule( Milliseconds rgnTimeout, Milliseconds lngthTimeout, bool &isLstOptml, InstCount &bestCost, InstCount &bestSchedLngth, InstCount &hurstcCost, @@ -202,7 +210,7 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( bool AcoAfterEnum = false; // Do we need to compute the graph's transitive closure? - bool needTransitiveClosure = false; + const bool NeedTransitiveClosure = needsTransitiveClosure(rgnTimeout); // Algorithm run order: // 1) Heuristic Scheduler @@ -244,11 +252,10 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( stats::problemSize.Record(dataDepGraph_->GetInstCnt()); - const auto *GraphTransformations = dataDepGraph_->GetGraphTrans(); - if (BbSchedulerEnabled || GraphTransformations->size() > 0 || needsSLIL()) - needTransitiveClosure = true; - - rslt = dataDepGraph_->SetupForSchdulng(needTransitiveClosure); + Logger::Event("RunningSetupForScheduling", // + "need_transitive_closure", NeedTransitiveClosure); + rslt = dataDepGraph_->SetupForSchdulng(NeedTransitiveClosure); + Logger::Event("RunningSetupForSchedulingFinished"); if (rslt != RES_SUCCESS) { Logger::Info("Invalid input DAG"); return rslt; @@ -258,34 +265,28 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( dumpDDG(dataDepGraph_, DDGDumpPath_); } - // Apply graph transformations - for (auto > : *GraphTransformations) { - rslt = GT->ApplyTrans(); + const bool IsSeqListSched = GetHeuristicSchedulerType() == SCHED_SEQ; - if (DumpDDGs_) { - dumpDDG(dataDepGraph_, DDGDumpPath_, GT->Name()); - } + if ((GraphTransPosition_ & GT_POSITION::BEFORE_HEURISTIC) != GT_POSITION::NONE + // The sequential list scheduler can "find" schedules invalidated by graph + // transformations. Delay until _after_ it. + && !IsSeqListSched) { + rslt = applyGraphTransformations(BbSchedulerEnabled, nullptr, isLstOptml, + bestSched); if (rslt != RES_SUCCESS) return rslt; - - // Update graph after each transformation - rslt = dataDepGraph_->UpdateSetupForSchdulng(needTransitiveClosure); - if (rslt != RES_SUCCESS) { - Logger::Info("Invalid DAG after graph transformations"); - return rslt; - } } SetupForSchdulng_(); - CmputAbslutUprBound_(); + CalculateUpperBounds(BbSchedulerEnabled); schedLwrBound_ = dataDepGraph_->GetSchedLwrBound(); // Step #1: Find the heuristic schedule if enabled. // Note: Heuristic scheduler is required for the two-pass scheduler // to use the sequential list scheduler which inserts stalls into // the schedule found in the first pass. - if (HeuristicSchedulerEnabled || IsSecondPass()) { + if (HeuristicSchedulerEnabled || IsSeqListSched) { Milliseconds hurstcStart = Utilities::GetProcessorTime(); lstSched = new InstSchedule(machMdl_, dataDepGraph_, vrfySched_); @@ -310,21 +311,39 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( // to the DDG. Some mutations were adding artificial edges which caused a // conflict with the sequential scheduler. Therefore, wait until the // sequential scheduler is done before adding artificial edges. - if (IsSecondPass() && EnableMutations) { + if (IsSeqListSched && EnableMutations) { static_cast(dataDepGraph_)->addArtificialEdges(); - rslt = dataDepGraph_->UpdateSetupForSchdulng(needTransitiveClosure); + rslt = dataDepGraph_->UpdateSetupForSchdulng(NeedTransitiveClosure); if (rslt != RES_SUCCESS) { Logger::Info("Invalid DAG after adding artificial cluster edges"); return rslt; } } + if ((GraphTransPosition_ & GT_POSITION::BEFORE_HEURISTIC) != GT_POSITION::NONE + // Run GT now that the sequential list scheduler is done. + && IsSeqListSched) { + rslt = applyGraphTransformations(BbSchedulerEnabled, nullptr, isLstOptml, + bestSched); + + if (rslt != RES_SUCCESS) + return rslt; + } + // This must be done after SetupForSchdulng() or UpdateSetupForSchdulng() to // avoid resetting lower bound values. - if (!BbSchedulerEnabled) - CmputAndSetCostLwrBound(); - else - CmputLwrBounds_(false); + const Milliseconds LbElapsedTime = Utilities::countMillisToExecute( + [&] { CalculateLowerBounds(BbSchedulerEnabled); }); + + // Log the lower bound on the cost, allowing tools reading the log to compare + // absolute rather than relative costs. + Logger::Event("CostLowerBound", "cost", costLwrBound_, "elapsed", + LbElapsedTime); + // TODO(justin): Remove once relevant scripts have been updated: + // plaidbench-validation-test.py, runspec-wrapper-SLIL.py + Logger::Info("Lower bound of cost before scheduling: %d", costLwrBound_); + Logger::Info("Lower bound of spill cost before scheduling: %d", + SpillCostLwrBound_); // Cost calculation must be below lower bounds calculation if (HeuristicSchedulerEnabled || IsSecondPass()) { @@ -351,7 +370,8 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( FinishHurstc_(); Logger::Event("HeuristicResult", "length", heuristicScheduleLength, // - "spill_cost", lstSched->GetSpillCost(), "cost", hurstcCost_); + "spill_cost", lstSched->GetSpillCost(), "cost", hurstcCost_, + "elapsed", hurstcTime); // TODO(justin): Remove once relevant scripts have been updated: // get-sched-length.py, runspec-wrapper-SLIL.py Logger::Info( @@ -407,14 +427,14 @@ FUNC_RESULT SchedRegion::FindOptimalSchedule( } #endif - // Log the lower bound on the cost, allowing tools reading the log to compare - // absolute rather than relative costs. - Logger::Event("CostLowerBound", "cost", costLwrBound_); - // TODO(justin): Remove once relevant scripts have been updated: - // plaidbench-validation-test.py, runspec-wrapper-SLIL.py - Logger::Info("Lower bound of cost before scheduling: %d", costLwrBound_); - Logger::Info("Lower bound of spill cost before scheduling: %d", - SpillCostLwrBound_); + if (!isLstOptml && (GraphTransPosition_ & GT_POSITION::AFTER_HEURISTIC) != + GT_POSITION::NONE) { + rslt = applyGraphTransformations(BbSchedulerEnabled, lstSched, isLstOptml, + bestSched); + + if (rslt != RES_SUCCESS) + return rslt; + } // Step #2: Use ACO to find a schedule if enabled and no optimal schedule is // yet to be found. @@ -1019,3 +1039,144 @@ FUNC_RESULT SchedRegion::runACO(InstSchedule *ReturnSched, delete AcoSchdulr; return Rslt; } + +void SchedRegion::updateBoundsAfterGraphTransformations( + bool BbSchedulerEnabled) { + const InstCount OldSchedLwrBound = schedLwrBound_; + const InstCount OldSchedUprBound = schedUprBound_; + const InstCount OldCostLwrBound = costLwrBound_; + + // Only recalculate if we've already computed them. + // If not, we'll already compute these bounds later on before scheduling. + if (IsUpperBoundSet_) + CalculateUpperBounds(BbSchedulerEnabled); + if (IsLowerBoundSet_) { + const Milliseconds LbElapsedTime = Utilities::countMillisToExecute( + [&] { CalculateLowerBounds(BbSchedulerEnabled); }); + + // Log the new lower bound on the cost, allowing tools reading the log to + // compare absolute rather than relative costs. + Logger::Event("CostLowerBound", "cost", costLwrBound_, "elapsed", + LbElapsedTime); + } + + // Some validation to try to catch bugs + if (OldSchedLwrBound > schedLwrBound_) { + Logger::Error("schedLwrBound got worse after graph transformations!"); + // Probably a bug, but still take the most accurate value + schedLwrBound_ = OldSchedLwrBound; + } + if (OldSchedUprBound < schedUprBound_) { + Logger::Error( + "schedUprBound got worse after graph transformations! (%d -> %d)", + OldSchedUprBound, schedUprBound_); + // Probably a bug, but still take the most accurate value + schedUprBound_ = OldSchedUprBound; + } + if (OldCostLwrBound > costLwrBound_) { + Logger::Error("costLwrBound got worse after graph transformations!"); + // Probably a bug, but still take the most accurate value + costLwrBound_ = OldCostLwrBound; + } +} + +FUNC_RESULT SchedRegion::applyGraphTransformation(GraphTrans *GT) { + DataDepGraph *DDG = dataDepGraph_; + FUNC_RESULT result = GT->ApplyTrans(); + + if (result != RES_SUCCESS) + return result; + + // Update graph after each transformation + result = DDG->UpdateSetupForSchdulng(/* need transitive closure? = */ true); + if (result != RES_SUCCESS) { + Logger::Error("Invalid DAG after graph transformations"); + return result; + } + + return result; +} + +FUNC_RESULT +SchedRegion::applyGraphTransformations(bool BbSchedulerEnabled, + InstSchedule *heuristicSched, + bool &isLstOptml, + InstSchedule *&bestSched) { + FUNC_RESULT result = RES_SUCCESS; + + auto &GraphTransformations = *dataDepGraph_->GetGraphTrans(); + if (GraphTransformations.empty()) + return result; + + Logger::Event("GraphTransformationsStart"); + + for (auto > : GraphTransformations) { + result = applyGraphTransformation(GT.get()); + + if (result != RES_SUCCESS) + return result; + + if (DumpDDGs_) { + updateBoundsAfterGraphTransformations(BbSchedulerEnabled); + dumpDDG(dataDepGraph_, DDGDumpPath_, GT->Name()); + } + } + + updateBoundsAfterGraphTransformations(BbSchedulerEnabled); + + // We don't change the heuristic schedule, but recompute its cost. + // Note that the heuristic schedule can have a schedule order invalidated by + // the graph transformations, but this is OKAY because: + // - It's still a valid schedule for the region (i.e. before graph + // transformations). + // - The B&B code only compares against the cost of the heuristic schedule, + // so B&B won't be messed up by the "invalid" schedule. + // - The cost calculation doesn't depend on the graph structure, just the + // schedule itself. + // - The only part of cost calculation that _does_ depend on the graph + // structure is the lower bounds, which are abstracted into a number, so it + // is okay. + if (heuristicSched) { + const InstCount heuristicScheduleLength = heuristicSched->GetCrntLngth(); + InstCount hurstcExecCost; + // Compute cost for Heuristic list scheduler, this must be called before + // calling GetCost() on the InstSchedule instance. + CmputNormCost_(heuristicSched, CCM_DYNMC, hurstcExecCost, true); + hurstcCost_ = heuristicSched->GetCost(); + + // Get unweighted spill cost for Heurstic list scheduler + HurstcSpillCost_ = heuristicSched->GetSpillCost(); + + // This schedule is optimal so ACO will not be run + // so set bestSched here. + if (hurstcCost_ == 0) { + isLstOptml = true; + bestSched = bestSched_ = heuristicSched; + bestSchedLngth_ = heuristicScheduleLength; + bestCost_ = hurstcCost_; + BestSpillCost_ = HurstcSpillCost_; + } + + Logger::Event("HeuristicResult", "length", heuristicScheduleLength, // + "spill_cost", heuristicSched->GetSpillCost(), "cost", + hurstcCost_); + } + + Logger::Event("GraphTransformationsFinished"); + + return result; +} + +void SchedRegion::CalculateUpperBounds(bool BbSchedulerEnabled) { + IsUpperBoundSet_ = true; + CmputAbslutUprBound_(); +} + +void SchedRegion::CalculateLowerBounds(bool BbSchedulerEnabled) { + IsLowerBoundSet_ = true; + schedLwrBound_ = dataDepGraph_->GetSchedLwrBound(); + if (!BbSchedulerEnabled) + CmputAndSetCostLwrBound(); + else + CmputLwrBounds_(false); +} diff --git a/lib/Scheduler/utilities.cpp b/lib/Scheduler/utilities.cpp index eb3c109d..2ea7bc0e 100644 --- a/lib/Scheduler/utilities.cpp +++ b/lib/Scheduler/utilities.cpp @@ -3,5 +3,5 @@ using namespace llvm::opt_sched; -std::chrono::high_resolution_clock::time_point Utilities::startTime = - std::chrono::high_resolution_clock::now(); +std::chrono::steady_clock::time_point Utilities::startTime = + std::chrono::steady_clock::now(); diff --git a/lib/Wrapper/AMDGPU/OptSchedGCNTarget.cpp b/lib/Wrapper/AMDGPU/OptSchedGCNTarget.cpp index de934477..aa6be72d 100644 --- a/lib/Wrapper/AMDGPU/OptSchedGCNTarget.cpp +++ b/lib/Wrapper/AMDGPU/OptSchedGCNTarget.cpp @@ -161,6 +161,8 @@ void OptSchedGCNTarget::initRegion(llvm::ScheduleDAGInstrs *DAG_, TargetOccupancy = shouldLimitWaves() ? MFI->getMinAllowedOccupancy() : MFI->getOccupancy(); + Logger::Event("TargetOccupancy", "region", RegionStartingOccupancy, "target", + TargetOccupancy); LLVM_DEBUG(dbgs() << "Region starting occupancy is " << RegionStartingOccupancy << "\n" << "Target occupancy is " << TargetOccupancy << "\n"); diff --git a/lib/Wrapper/OptimizingScheduler.cpp b/lib/Wrapper/OptimizingScheduler.cpp index 49ea6d79..c6eea6b3 100644 --- a/lib/Wrapper/OptimizingScheduler.cpp +++ b/lib/Wrapper/OptimizingScheduler.cpp @@ -13,6 +13,7 @@ #include "opt-sched/Scheduler/data_dep.h" #include "opt-sched/Scheduler/graph_trans.h" #include "opt-sched/Scheduler/graph_trans_ilp.h" +#include "opt-sched/Scheduler/graph_trans_ilp_occupancy_preserving.h" #include "opt-sched/Scheduler/random.h" #include "opt-sched/Scheduler/register.h" #include "opt-sched/Scheduler/sched_region.h" @@ -198,6 +199,12 @@ void ScheduleDAGOptSched::addGraphTransformations( GraphTransformations->push_back( llvm::make_unique(BDDG)); } + + if (OccupancyPreservingILPStaticNodeSup || + (OccupancyPreservingILPStaticNodeSup2ndPass && SecondPass)) { + GraphTransformations->push_back( + llvm::make_unique(BDDG)); + } } ScheduleDAGOptSched::ScheduleDAGOptSched( @@ -296,7 +303,8 @@ void ScheduleDAGOptSched::schedule() { return; } - if (!OptSchedEnabled || !scheduleSpecificRegion(RegionName, schedIni)) { + if (!OptSchedEnabled || !scheduleSpecificRegion(RegionName, schedIni) || + NumRegionInstrs > MaxRegionInstrs) { LLVM_DEBUG(dbgs() << "Skipping region " << RegionName << "\n"); ScheduleDAGMILive::schedule(); return; @@ -427,7 +435,8 @@ void ScheduleDAGOptSched::schedule() { auto region = llvm::make_unique( OST.get(), static_cast(DDG.get()), 0, HistTableHashBits, LowerBoundAlgorithm, HeuristicPriorities, EnumPriorities, VerifySchedule, - PruningStrategy, SchedForRPOnly, EnumStalls, SCW, SCF, HeurSchedType); + PruningStrategy, SchedForRPOnly, EnumStalls, SCW, SCF, HeurSchedType, + SecondPass ? GraphTransPosition2ndPass : GraphTransPosition); bool IsEasy = false; InstCount NormBestCost = 0; @@ -465,7 +474,7 @@ void ScheduleDAGOptSched::schedule() { } // Setup time before scheduling - Utilities::startTime = std::chrono::high_resolution_clock::now(); + Utilities::startTime = std::chrono::steady_clock::now(); // Schedule region. Rslt = region->FindOptimalSchedule(CurrentRegionTimeout, CurrentLengthTimeout, IsEasy, NormBestCost, BestSchedLngth, @@ -597,13 +606,24 @@ void ScheduleDAGOptSched::loadOptSchedConfig() { LatencyPrecision = fetchLatencyPrecision(); TreatOrderAsDataDeps = schedIni.GetBool("TREAT_ORDER_DEPS_AS_DATA_DEPS"); + MaxRegionInstrs = + schedIni.GetInt("MAX_REGION_LENGTH", static_cast(-1)); + UseLLVMScheduler = false; // should we print spills for the current function OPTSCHED_gPrintSpills = shouldPrintSpills(); + GraphTransPosition = + parseGraphTransPosition(schedIni.GetString("GT_POSITION")); + GraphTransPosition2ndPass = + parseGraphTransPosition(schedIni.GetString("2ND_PASS_GT_POSITION")); StaticNodeSup = schedIni.GetBool("STATIC_NODE_SUPERIORITY", false); MultiPassStaticNodeSup = schedIni.GetBool("MULTI_PASS_NODE_SUPERIORITY", false); ILPStaticNodeSup = schedIni.GetBool("STATIC_NODE_SUPERIORITY_ILP", false); + OccupancyPreservingILPStaticNodeSup = + schedIni.GetBool("STATIC_NODE_SUPERIORITY_ILP_PRESERVE_OCCUPANCY", false); + OccupancyPreservingILPStaticNodeSup2ndPass = schedIni.GetBool( + "2ND_PASS_ILP_NODE_SUPERIORITY_PRESERVING_OCCUPANCY", false); // setup pruning PruningStrategy.rlxd = schedIni.GetBool("APPLY_RELAXED_PRUNING"); PruningStrategy.nodeSup = schedIni.GetBool("DYNAMIC_NODE_SUPERIORITY"); @@ -723,6 +743,33 @@ static LISTSCHED_HEURISTIC GetNextHeuristicName(const std::string &Str, llvm::report_fatal_error("Unrecognized heuristic used: " + Str, false); } +GT_POSITION +ScheduleDAGOptSched::parseGraphTransPosition(const llvm::StringRef Str) { + GT_POSITION result = GT_POSITION::NONE; + + llvm::StringRef Cur = Str; + + do { + auto NextRest = Cur.split('_'); + const llvm::StringRef Next = NextRest.first; + Cur = NextRest.second; + + if (Next.empty()) + break; + + if (Next == "AH") + result |= GT_POSITION::AFTER_HEURISTIC; + else if (Next == "BH") + result |= GT_POSITION::BEFORE_HEURISTIC; + else + llvm::report_fatal_error("Unrecognized option for GT_POSITION setting: " + + Next.str() + " out of " + Str.str(), + false); + } while (true); + + return result; +} + SchedPriorities ScheduleDAGOptSched::parseHeuristic(const std::string &Str) { SchedPriorities Priorities; size_t StartIndex = 0; diff --git a/lib/Wrapper/OptimizingScheduler.h b/lib/Wrapper/OptimizingScheduler.h index 8b830f15..0bacea58 100644 --- a/lib/Wrapper/OptimizingScheduler.h +++ b/lib/Wrapper/OptimizingScheduler.h @@ -16,6 +16,7 @@ #include "opt-sched/Scheduler/sched_region.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/Support/Debug.h" #include @@ -148,6 +149,10 @@ class ScheduleDAGOptSched : public ScheduleDAGMILive { // timout per block bool IsTimeoutPerInst; + // The maximum number of instructions to schedule with our scheduler. + // Beyond that, it uses the heuristic scheduler. + unsigned MaxRegionInstrs; + // The maximum number of instructions that a block can contain to be // Treat data dependencies of type ORDER as data dependencies bool TreatOrderAsDataDeps; @@ -198,12 +203,21 @@ class ScheduleDAGOptSched : public ScheduleDAGMILive { // scheduling approach. SchedPriorities SecondPassEnumPriorities; + GT_POSITION GraphTransPosition = GT_POSITION::NONE; + GT_POSITION GraphTransPosition2ndPass = GT_POSITION::NONE; + // Static node superiority RP only graph transformation. bool StaticNodeSup; // ILP Static Node Superiority graph transformation bool ILPStaticNodeSup; + // Occupancy-preserving ILP Static Node Superiority graph transformation + bool OccupancyPreservingILPStaticNodeSup; + + // Occupancy-preserving ILP Static Node Superiority graph transformation + bool OccupancyPreservingILPStaticNodeSup2ndPass; + // Run multiple passes of the static node superiority algorithm // (StaticNodeSup must be enabled). bool MultiPassStaticNodeSup; @@ -226,6 +240,9 @@ class ScheduleDAGOptSched : public ScheduleDAGMILive { // Get spill cost function SPILL_COST_FUNCTION parseSpillCostFunc() const; + // Get the GT_POSITION + static GT_POSITION parseGraphTransPosition(llvm::StringRef Str); + // Return true if the OptScheduler should be enabled for the function this // ScheduleDAG was created for bool isOptSchedEnabled() const;