diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ffea8d7ca4288..0b32170e4ed14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1675,6 +1675,9 @@ void GCNPassConfig::addOptimizedRegAlloc() { // instructions that cause scheduling barriers. insertPass(&MachineSchedulerID, &SIWholeQuadModeID); + if (!LateWaveTransform && OptExecMaskPreRA) + insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID); + // This is not an essential optimization and it has a noticeable impact on // compilation time, so we only enable it from O2. if (TM->getOptLevel() > CodeGenOptLevel::Less) @@ -1853,13 +1856,14 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { // allocations. // addPass(&AMDGPUUpdateAllocatedVGPRLiveRangesID); + // Optimize EXEC-mask related instructions around SGPR register class. + if (OptExecMaskPreRA) + addPass(&SIOptimizeExecMaskingPreRAID); + // Now we can perform register-coalescing on remaining copies, // mainly sgpr copies and wwm-vgpr copies. addPass(&RegisterCoalescerID); } - - if (OptExecMaskPreRA) - addPass(&SIOptimizeExecMaskingPreRAID); addPass(createSGPRAllocPass(true)); diff --git a/llvm/test/CodeGen/AMDGPU/WaveTransform/wave-transform-llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/WaveTransform/wave-transform-llc-pipeline.ll index 381daed5baf95..a5707994c5a91 100644 --- a/llvm/test/CodeGen/AMDGPU/WaveTransform/wave-transform-llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/WaveTransform/wave-transform-llc-pipeline.ll @@ -385,9 +385,9 @@ ; GCN-O3-NEXT: AMDGPU Control Flow Wave Transform ; GCN-O3-NEXT: Slot index numbering ; GCN-O3-NEXT: Live Interval Analysis +; GCN-O3-NEXT: SI optimize exec mask operations pre-RA ; GCN-O3-NEXT: Machine Natural Loop Construction ; GCN-O3-NEXT: Register Coalescer -; GCN-O3-NEXT: SI optimize exec mask operations pre-RA ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Debug Variable Analysis ; GCN-O3-NEXT: Live Stack Slot Analysis diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 78b64094f6e27..4e0aae2bc2454 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -352,8 +352,8 @@ ; GCN-O1-NEXT: Rewrite Partial Register Uses ; GCN-O1-NEXT: Machine Instruction Scheduler ; GCN-O1-NEXT: SI Whole Quad Mode -; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg ; GCN-O1-NEXT: SI optimize exec mask operations pre-RA +; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg ; GCN-O1-NEXT: Machine Natural Loop Construction ; GCN-O1-NEXT: Machine Block Frequency Analysis ; GCN-O1-NEXT: Debug Variable Analysis @@ -665,8 +665,8 @@ ; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler ; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations ; GCN-O1-OPTS-NEXT: SI Whole Quad Mode -; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg ; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA +; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction ; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis ; GCN-O1-OPTS-NEXT: Debug Variable Analysis @@ -983,9 +983,9 @@ ; GCN-O2-NEXT: Machine Instruction Scheduler ; GCN-O2-NEXT: AMDGPU Pre-RA optimizations ; GCN-O2-NEXT: SI Whole Quad Mode +; GCN-O2-NEXT: SI optimize exec mask operations pre-RA ; GCN-O2-NEXT: SI Form memory clauses ; GCN-O2-NEXT: AMDGPU Pre-RA Long Branch Reg -; GCN-O2-NEXT: SI optimize exec mask operations pre-RA ; GCN-O2-NEXT: Machine Natural Loop Construction ; GCN-O2-NEXT: Machine Block Frequency Analysis ; GCN-O2-NEXT: Debug Variable Analysis @@ -1315,9 +1315,9 @@ ; GCN-O3-NEXT: Machine Instruction Scheduler ; GCN-O3-NEXT: AMDGPU Pre-RA optimizations ; GCN-O3-NEXT: SI Whole Quad Mode +; GCN-O3-NEXT: SI optimize exec mask operations pre-RA ; GCN-O3-NEXT: SI Form memory clauses ; GCN-O3-NEXT: AMDGPU Pre-RA Long Branch Reg -; GCN-O3-NEXT: SI optimize exec mask operations pre-RA ; GCN-O3-NEXT: Machine Natural Loop Construction ; GCN-O3-NEXT: Machine Block Frequency Analysis ; GCN-O3-NEXT: Debug Variable Analysis