Skip to content

Commit fe3bc1b

Browse files
committed
[llvm-mca] Add extra counters for move elimination in view RegisterFileStatistics.
This patch teaches view RegisterFileStatistics how to report events for optimizable register moves. For each processor register file, view RegisterFileStatistics reports the following extra information: - Number of optimizable register moves - Number of register moves eliminated - Number of zero moves (i.e. register moves that propagate a zero) - Max Number of moves eliminated per cycle. Differential Revision: https://reviews.llvm.org/D53976 llvm-svn: 345865
1 parent eb56894 commit fe3bc1b

File tree

12 files changed

+183
-47
lines changed

12 files changed

+183
-47
lines changed

llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-1.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ vaddps %xmm1, %xmm1, %xmm2
3939
# CHECK-NEXT: Number of physical registers: 72
4040
# CHECK-NEXT: Total number of mappings created: 3
4141
# CHECK-NEXT: Max number of mappings used: 3
42+
# CHECK-NEXT: Number of optimizable moves: 3
43+
# CHECK-NEXT: Number of moves eliminated: 3 (100.0%)
44+
# CHECK-NEXT: Number of zero moves: 3 (100.0%)
45+
# CHECK-NEXT: Max moves eliminated per cycle: 1
4246

4347
# CHECK: * Register File #2 -- JIntegerPRF:
4448
# CHECK-NEXT: Number of physical registers: 64

llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-2.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ movdqu %xmm5, %xmm0
4949
# CHECK-NEXT: Number of physical registers: 72
5050
# CHECK-NEXT: Total number of mappings created: 0
5151
# CHECK-NEXT: Max number of mappings used: 0
52+
# CHECK-NEXT: Number of optimizable moves: 21
53+
# CHECK-NEXT: Number of moves eliminated: 21 (100.0%)
54+
# CHECK-NEXT: Number of zero moves: 21 (100.0%)
55+
# CHECK-NEXT: Max moves eliminated per cycle: 2
5256

5357
# CHECK: * Register File #2 -- JIntegerPRF:
5458
# CHECK-NEXT: Number of physical registers: 64

llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-3.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ vmovdqu %xmm5, %xmm0
4444
# CHECK-NEXT: Number of physical registers: 72
4545
# CHECK-NEXT: Total number of mappings created: 0
4646
# CHECK-NEXT: Max number of mappings used: 0
47+
# CHECK-NEXT: Number of optimizable moves: 18
48+
# CHECK-NEXT: Number of moves eliminated: 18 (100.0%)
49+
# CHECK-NEXT: Number of zero moves: 18 (100.0%)
50+
# CHECK-NEXT: Max moves eliminated per cycle: 2
4751

4852
# CHECK: * Register File #2 -- JIntegerPRF:
4953
# CHECK-NEXT: Number of physical registers: 64

llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-4.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ mov %edx, %eax
4545
# CHECK-NEXT: Number of physical registers: 64
4646
# CHECK-NEXT: Total number of mappings created: 0
4747
# CHECK-NEXT: Max number of mappings used: 0
48+
# CHECK-NEXT: Number of optimizable moves: 12
49+
# CHECK-NEXT: Number of moves eliminated: 12 (100.0%)
50+
# CHECK-NEXT: Number of zero moves: 12 (100.0%)
51+
# CHECK-NEXT: Max moves eliminated per cycle: 2
4852

4953
# CHECK: Resources:
5054
# CHECK-NEXT: [0] - JALU0

llvm/test/tools/llvm-mca/X86/BtVer2/reg-move-elimination-5.s

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ mov %rdx, %rax
4545
# CHECK-NEXT: Number of physical registers: 64
4646
# CHECK-NEXT: Total number of mappings created: 0
4747
# CHECK-NEXT: Max number of mappings used: 0
48+
# CHECK-NEXT: Number of optimizable moves: 12
49+
# CHECK-NEXT: Number of moves eliminated: 12 (100.0%)
50+
# CHECK-NEXT: Number of zero moves: 12 (100.0%)
51+
# CHECK-NEXT: Max moves eliminated per cycle: 2
4852

4953
# CHECK: Resources:
5054
# CHECK-NEXT: [0] - JALU0

llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp

Lines changed: 77 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ namespace mca {
2121
RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
2222
: STI(sti) {
2323
const MCSchedModel &SM = STI.getSchedModel();
24-
RegisterFileUsage Empty = {0, 0, 0};
24+
RegisterFileUsage RFUEmpty = {0, 0, 0};
25+
MoveEliminationInfo MEIEmpty = {0, 0, 0, 0, 0};
2526
if (!SM.hasExtraProcessorInfo()) {
2627
// Assume a single register file.
27-
RegisterFiles.emplace_back(Empty);
28+
PRFUsage.emplace_back(RFUEmpty);
29+
MoveElimInfo.emplace_back(MEIEmpty);
2830
return;
2931
}
3032

@@ -35,8 +37,42 @@ RegisterFileStatistics::RegisterFileStatistics(const MCSubtargetInfo &sti)
3537
// be skipped. If there are no user defined register files, then reserve a
3638
// single entry for the default register file at index #0.
3739
unsigned NumRegFiles = std::max(PI.NumRegisterFiles, 1U);
38-
RegisterFiles.resize(NumRegFiles);
39-
std::fill(RegisterFiles.begin(), RegisterFiles.end(), Empty);
40+
41+
PRFUsage.resize(NumRegFiles);
42+
std::fill(PRFUsage.begin(), PRFUsage.end(), RFUEmpty);
43+
44+
MoveElimInfo.resize(NumRegFiles);
45+
std::fill(MoveElimInfo.begin(), MoveElimInfo.end(), MEIEmpty);
46+
}
47+
48+
void RegisterFileStatistics::updateRegisterFileUsage(
49+
ArrayRef<unsigned> UsedPhysRegs) {
50+
for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I) {
51+
RegisterFileUsage &RFU = PRFUsage[I];
52+
unsigned NumUsedPhysRegs = UsedPhysRegs[I];
53+
RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
54+
RFU.TotalMappings += NumUsedPhysRegs;
55+
RFU.MaxUsedMappings =
56+
std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
57+
}
58+
}
59+
60+
void RegisterFileStatistics::updateMoveElimInfo(const Instruction &Inst) {
61+
if (!Inst.isOptimizableMove())
62+
return;
63+
64+
assert(Inst.getDefs().size() == 1 && "Expected a single definition!");
65+
assert(Inst.getUses().size() == 1 && "Expected a single register use!");
66+
const WriteState &WS = Inst.getDefs()[0];
67+
const ReadState &RS = Inst.getUses()[0];
68+
69+
MoveEliminationInfo &Info =
70+
MoveElimInfo[Inst.getDefs()[0].getRegisterFileID()];
71+
Info.TotalMoveEliminationCandidates++;
72+
if (WS.isEliminated())
73+
Info.CurrentMovesEliminated++;
74+
if (WS.isWriteZero() && RS.isReadZero())
75+
Info.TotalMovesThatPropagateZero++;
4076
}
4177

4278
void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
@@ -45,21 +81,24 @@ void RegisterFileStatistics::onEvent(const HWInstructionEvent &Event) {
4581
break;
4682
case HWInstructionEvent::Retired: {
4783
const auto &RE = static_cast<const HWInstructionRetiredEvent &>(Event);
48-
for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I)
49-
RegisterFiles[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
84+
for (unsigned I = 0, E = PRFUsage.size(); I < E; ++I)
85+
PRFUsage[I].CurrentlyUsedMappings -= RE.FreedPhysRegs[I];
5086
break;
5187
}
5288
case HWInstructionEvent::Dispatched: {
5389
const auto &DE = static_cast<const HWInstructionDispatchedEvent &>(Event);
54-
for (unsigned I = 0, E = RegisterFiles.size(); I < E; ++I) {
55-
RegisterFileUsage &RFU = RegisterFiles[I];
56-
unsigned NumUsedPhysRegs = DE.UsedPhysRegs[I];
57-
RFU.CurrentlyUsedMappings += NumUsedPhysRegs;
58-
RFU.TotalMappings += NumUsedPhysRegs;
59-
RFU.MaxUsedMappings =
60-
std::max(RFU.MaxUsedMappings, RFU.CurrentlyUsedMappings);
61-
}
90+
updateRegisterFileUsage(DE.UsedPhysRegs);
91+
updateMoveElimInfo(*DE.IR.getInstruction());
92+
}
6293
}
94+
}
95+
96+
void RegisterFileStatistics::onCycleEnd() {
97+
for (MoveEliminationInfo &MEI : MoveElimInfo) {
98+
unsigned &CurrentMax = MEI.MaxMovesEliminatedPerCycle;
99+
CurrentMax = std::max(CurrentMax, MEI.CurrentMovesEliminated);
100+
MEI.TotalMovesEliminated += MEI.CurrentMovesEliminated;
101+
MEI.CurrentMovesEliminated = 0;
63102
}
64103
}
65104

@@ -68,14 +107,14 @@ void RegisterFileStatistics::printView(raw_ostream &OS) const {
68107
raw_string_ostream TempStream(Buffer);
69108

70109
TempStream << "\n\nRegister File statistics:";
71-
const RegisterFileUsage &GlobalUsage = RegisterFiles[0];
110+
const RegisterFileUsage &GlobalUsage = PRFUsage[0];
72111
TempStream << "\nTotal number of mappings created: "
73112
<< GlobalUsage.TotalMappings;
74113
TempStream << "\nMax number of mappings used: "
75114
<< GlobalUsage.MaxUsedMappings << '\n';
76115

77-
for (unsigned I = 1, E = RegisterFiles.size(); I < E; ++I) {
78-
const RegisterFileUsage &RFU = RegisterFiles[I];
116+
for (unsigned I = 1, E = PRFUsage.size(); I < E; ++I) {
117+
const RegisterFileUsage &RFU = PRFUsage[I];
79118
// Obtain the register file descriptor from the scheduling model.
80119
assert(STI.getSchedModel().hasExtraProcessorInfo() &&
81120
"Unable to find register file info!");
@@ -98,6 +137,27 @@ void RegisterFileStatistics::printView(raw_ostream &OS) const {
98137
<< RFU.TotalMappings;
99138
TempStream << "\n Max number of mappings used: "
100139
<< RFU.MaxUsedMappings << '\n';
140+
const MoveEliminationInfo &MEI = MoveElimInfo[I];
141+
142+
if (MEI.TotalMoveEliminationCandidates) {
143+
TempStream << " Number of optimizable moves: "
144+
<< MEI.TotalMoveEliminationCandidates;
145+
double EliminatedMovProportion = (double)MEI.TotalMovesEliminated /
146+
MEI.TotalMoveEliminationCandidates *
147+
100.0;
148+
double ZeroMovProportion = (double)MEI.TotalMovesThatPropagateZero /
149+
MEI.TotalMoveEliminationCandidates * 100.0;
150+
TempStream << "\n Number of moves eliminated: "
151+
<< MEI.TotalMovesEliminated << " "
152+
<< format("(%.1f%%)",
153+
floor((EliminatedMovProportion * 10) + 0.5) / 10);
154+
TempStream << "\n Number of zero moves: "
155+
<< MEI.TotalMovesThatPropagateZero << " "
156+
<< format("(%.1f%%)",
157+
floor((ZeroMovProportion * 10) + 0.5) / 10);
158+
TempStream << "\n Max moves eliminated per cycle: "
159+
<< MEI.MaxMovesEliminatedPerCycle << '\n';
160+
}
101161
}
102162

103163
TempStream.flush();

llvm/tools/llvm-mca/Views/RegisterFileStatistics.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
/// Number of physical registers: 72
2222
/// Total number of mappings created: 0
2323
/// Max number of mappings used: 0
24+
/// Number of optimizable moves: 200
25+
/// Number of moves eliminated: 200 (100.0%)
26+
/// Number of zero moves: 200 (100.0%)
27+
/// Max moves eliminated per cycle: 2
2428
///
2529
/// * Register File #2 -- IntegerPRF:
2630
/// Number of physical registers: 64
@@ -49,12 +53,25 @@ class RegisterFileStatistics : public View {
4953
unsigned CurrentlyUsedMappings;
5054
};
5155

56+
struct MoveEliminationInfo {
57+
unsigned TotalMoveEliminationCandidates;
58+
unsigned TotalMovesEliminated;
59+
unsigned TotalMovesThatPropagateZero;
60+
unsigned MaxMovesEliminatedPerCycle;
61+
unsigned CurrentMovesEliminated;
62+
};
63+
5264
// There is one entry for each register file implemented by the processor.
53-
llvm::SmallVector<RegisterFileUsage, 4> RegisterFiles;
65+
llvm::SmallVector<RegisterFileUsage, 4> PRFUsage;
66+
llvm::SmallVector<MoveEliminationInfo, 4> MoveElimInfo;
67+
68+
void updateRegisterFileUsage(ArrayRef<unsigned> UsedPhysRegs);
69+
void updateMoveElimInfo(const Instruction &Inst);
5470

5571
public:
5672
RegisterFileStatistics(const llvm::MCSubtargetInfo &sti);
5773

74+
void onCycleEnd() override;
5875
void onEvent(const HWInstructionEvent &Event) override;
5976
void printView(llvm::raw_ostream &OS) const override;
6077
};

llvm/tools/llvm-mca/include/HardwareUnits/RegisterFile.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ class RegisterFile : public HardwareUnit {
173173
void freePhysRegs(const RegisterRenamingInfo &Entry,
174174
MutableArrayRef<unsigned> FreedPhysRegs);
175175

176+
// Collects writes that are in a RAW dependency with RS.
177+
// This method is called from `addRegisterRead()`.
178+
void collectWrites(const ReadState &RS,
179+
SmallVectorImpl<WriteRef> &Writes) const;
180+
176181
// Create an instance of RegisterMappingTracker for every register file
177182
// specified by the processor model.
178183
// If no register file is specified, then this method creates a default
@@ -189,6 +194,10 @@ class RegisterFile : public HardwareUnit {
189194
// No physical regiser is allocated if this write is from a zero-idiom.
190195
void addRegisterWrite(WriteRef Write, MutableArrayRef<unsigned> UsedPhysRegs);
191196

197+
// Collect writes that are in a data dependency with RS, and update RS
198+
// internal state.
199+
void addRegisterRead(ReadState &RS, SmallVectorImpl<WriteRef> &Writes) const;
200+
192201
// Removes write \param WS from the register mappings.
193202
// Physical registers may be released to reflect this update.
194203
// No registers are released if this write is from a zero-idiom.
@@ -200,7 +209,7 @@ class RegisterFile : public HardwareUnit {
200209
// If RS is a read from a zero register, and WS is eliminated, then
201210
// `WS.WritesZero` is also set, so that method addRegisterWrite() would not
202211
// reserve a physical register for it.
203-
bool tryEliminateMove(WriteState &WS, const ReadState &RS);
212+
bool tryEliminateMove(WriteState &WS, ReadState &RS);
204213

205214
// Checks if there are enough physical registers in the register files.
206215
// Returns a "response mask" where each bit represents the response from a
@@ -212,7 +221,8 @@ class RegisterFile : public HardwareUnit {
212221
// Current implementation can simulate up to 32 register files (including the
213222
// special register file at index #0).
214223
unsigned isAvailable(ArrayRef<unsigned> Regs) const;
215-
void collectWrites(SmallVectorImpl<WriteRef> &Writes, unsigned RegID) const;
224+
225+
// Returns the number of PRFs implemented by this processor.
216226
unsigned getNumRegisterFiles() const { return RegisterFiles.size(); }
217227

218228
// Notify each PRF that a new cycle just started.

llvm/tools/llvm-mca/include/Instruction.h

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ class WriteState {
101101
// field RegisterID from WD.
102102
unsigned RegisterID;
103103

104+
// Physical register file that serves register RegisterID.
105+
unsigned PRFID;
106+
104107
// True if this write implicitly clears the upper portion of RegisterID's
105108
// super-registers.
106109
bool ClearsSuperRegs;
@@ -135,7 +138,7 @@ class WriteState {
135138
WriteState(const WriteDescriptor &Desc, unsigned RegID,
136139
bool clearsSuperRegs = false, bool writesZero = false)
137140
: WD(&Desc), CyclesLeft(UNKNOWN_CYCLES), RegisterID(RegID),
138-
ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
141+
PRFID(0), ClearsSuperRegs(clearsSuperRegs), WritesZero(writesZero),
139142
IsEliminated(false), DependentWrite(nullptr), NumWriteUsers(0U) {}
140143

141144
WriteState(const WriteState &Other) = default;
@@ -144,6 +147,7 @@ class WriteState {
144147
int getCyclesLeft() const { return CyclesLeft; }
145148
unsigned getWriteResourceID() const { return WD->SClassOrWriteResourceID; }
146149
unsigned getRegisterID() const { return RegisterID; }
150+
unsigned getRegisterFileID() const { return PRFID; }
147151
unsigned getLatency() const { return WD->Latency; }
148152

149153
void addUser(ReadState *Use, int ReadAdvance);
@@ -168,6 +172,8 @@ class WriteState {
168172
IsEliminated = true;
169173
}
170174

175+
void setPRF(unsigned PRF) { PRFID = PRF; }
176+
171177
// On every cycle, update CyclesLeft and notify dependent users.
172178
void cycleEvent();
173179
void onInstructionIssued();
@@ -185,6 +191,8 @@ class ReadState {
185191
const ReadDescriptor *RD;
186192
// Physical register identified associated to this read.
187193
unsigned RegisterID;
194+
// Physical register file that serves register RegisterID.
195+
unsigned PRFID;
188196
// Number of writes that contribute to the definition of RegisterID.
189197
// In the absence of partial register updates, the number of DependentWrites
190198
// cannot be more than one.
@@ -201,18 +209,21 @@ class ReadState {
201209
// This field is set to true only if there are no dependent writes, and
202210
// there are no `CyclesLeft' to wait.
203211
bool IsReady;
212+
// True if this is a read from a known zero register.
213+
bool IsZero;
204214
// True if this register read is from a dependency-breaking instruction.
205215
bool IndependentFromDef;
206216

207217
public:
208218
ReadState(const ReadDescriptor &Desc, unsigned RegID)
209-
: RD(&Desc), RegisterID(RegID), DependentWrites(0),
219+
: RD(&Desc), RegisterID(RegID), PRFID(0), DependentWrites(0),
210220
CyclesLeft(UNKNOWN_CYCLES), TotalCycles(0), IsReady(true),
211-
IndependentFromDef(false) {}
221+
IsZero(false), IndependentFromDef(false) {}
212222

213223
const ReadDescriptor &getDescriptor() const { return *RD; }
214224
unsigned getSchedClass() const { return RD->SchedClassID; }
215225
unsigned getRegisterID() const { return RegisterID; }
226+
unsigned getRegisterFileID() const { return PRFID; }
216227

217228
bool isReady() const { return IsReady; }
218229
bool isImplicitRead() const { return RD->isImplicitRead(); }
@@ -226,6 +237,10 @@ class ReadState {
226237
DependentWrites = Writes;
227238
IsReady = !Writes;
228239
}
240+
241+
bool isReadZero() const { return IsZero; }
242+
void setReadZero() { IsZero = true; }
243+
void setPRF(unsigned ID) { PRFID = ID; }
229244
};
230245

231246
/// A sequence of cycles.

llvm/tools/llvm-mca/include/Stages/DispatchStage.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,6 @@ class DispatchStage final : public Stage {
6868
ArrayRef<unsigned> UsedPhysRegs,
6969
unsigned uOps) const;
7070

71-
void collectWrites(SmallVectorImpl<WriteRef> &Vec, unsigned RegID) const {
72-
return PRF.collectWrites(Vec, RegID);
73-
}
74-
7571
public:
7672
DispatchStage(const MCSubtargetInfo &Subtarget, const MCRegisterInfo &MRI,
7773
unsigned MaxDispatchWidth, RetireControlUnit &R,

0 commit comments

Comments
 (0)