Skip to content

Commit 40884ab

Browse files
vporpoDhruvSrivastavaX
authored andcommitted
[SandboxVec] Add a simple pack reuse pass (llvm#141848)
This patch implements a simple pass that tries to de-duplicate packs. If there are two packing patterns inserting the exact same values in the exact same order, then we will keep the top-most one of them. Even though such patterns may be optimized away by subsequent passes it is still useful to do this within the vectorizer because otherwise the cost estimation may be off, making the vectorizer over conservative.
1 parent ca11e10 commit 40884ab

File tree

9 files changed

+344
-2
lines changed

9 files changed

+344
-2
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//===- PackReuse.h --------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A pack de-duplication pass.
10+
//
11+
12+
#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H
13+
#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H
14+
15+
#include "llvm/ADT/StringRef.h"
16+
#include "llvm/SandboxIR/Pass.h"
17+
#include "llvm/SandboxIR/Region.h"
18+
19+
namespace llvm::sandboxir {
20+
21+
/// This pass aims at de-duplicating packs, i.e., try to reuse already existing
22+
/// pack patterns instead of keeping both.
23+
/// This is useful because even though the duplicates will most probably be
24+
/// optimized away by future passes, their added cost can make vectorization
25+
/// more conservative than it should be.
26+
class PackReuse final : public RegionPass {
27+
bool Change = false;
28+
29+
public:
30+
PackReuse() : RegionPass("pack-reuse") {}
31+
bool runOnRegion(Region &Rgn, const Analyses &A) final;
32+
};
33+
34+
} // namespace llvm::sandboxir
35+
36+
#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_PACKREUSE_H

llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,25 @@
1717
#include "llvm/SandboxIR/Type.h"
1818
#include "llvm/SandboxIR/Utils.h"
1919

20-
namespace llvm::sandboxir {
20+
namespace llvm {
21+
/// Traits for DenseMap.
22+
template <> struct DenseMapInfo<SmallVector<sandboxir::Value *>> {
23+
static inline SmallVector<sandboxir::Value *> getEmptyKey() {
24+
return SmallVector<sandboxir::Value *>({(sandboxir::Value *)-1});
25+
}
26+
static inline SmallVector<sandboxir::Value *> getTombstoneKey() {
27+
return SmallVector<sandboxir::Value *>({(sandboxir::Value *)-2});
28+
}
29+
static unsigned getHashValue(const SmallVector<sandboxir::Value *> &Vec) {
30+
return hash_combine_range(Vec.begin(), Vec.end());
31+
}
32+
static bool isEqual(const SmallVector<sandboxir::Value *> &Vec1,
33+
const SmallVector<sandboxir::Value *> &Vec2) {
34+
return Vec1 == Vec2;
35+
}
36+
};
37+
38+
namespace sandboxir {
2139

2240
class VecUtils {
2341
public:
@@ -179,13 +197,79 @@ class VecUtils {
179197
/// \Returns the first integer power of 2 that is <= Num.
180198
static unsigned getFloorPowerOf2(unsigned Num);
181199

200+
/// Helper struct for `matchPack()`. Describes the instructions and operands
201+
/// of a pack pattern.
202+
struct PackPattern {
203+
/// The insertelement instructions that form the pack pattern in bottom-up
204+
/// order, i.e., the first instruction in `Instrs` is the bottom-most
205+
/// InsertElement instruction of the pack pattern.
206+
/// For example in this simple pack pattern:
207+
/// %Pack0 = insertelement <2 x i8> poison, i8 %v0, i64 0
208+
/// %Pack1 = insertelement <2 x i8> %Pack0, i8 %v1, i64 1
209+
/// this is [ %Pack1, %Pack0 ].
210+
SmallVector<Instruction *> Instrs;
211+
/// The "external" operands of the pack pattern, i.e., the values that get
212+
/// packed into a vector, skipping the ones in `Instrs`. The operands are in
213+
/// bottom-up order, starting from the operands of the bottom-most insert.
214+
/// So in our example this would be [ %v1, %v0 ].
215+
SmallVector<Value *> Operands;
216+
};
217+
218+
/// If \p I is the last instruction of a pack pattern (i.e., an InsertElement
219+
/// into a vector), then this function returns the instructions in the pack
220+
/// and the operands in the pack, else returns nullopt.
221+
/// Here is an example of a matched pattern:
222+
/// %PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0
223+
/// %PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1
224+
/// TODO: this currently detects only simple canonicalized patterns.
225+
static std::optional<PackPattern> matchPack(Instruction *I) {
226+
// TODO: Support vector pack patterns.
227+
// TODO: Support out-of-order inserts.
228+
229+
// Early return if `I` is not an Insert.
230+
if (!isa<InsertElementInst>(I))
231+
return std::nullopt;
232+
auto *BB0 = I->getParent();
233+
// The pack contains as many instrs as the lanes of the bottom-most Insert
234+
unsigned ExpectedNumInserts = VecUtils::getNumLanes(I);
235+
assert(ExpectedNumInserts >= 2 && "Expected at least 2 inserts!");
236+
PackPattern Pack;
237+
Pack.Operands.resize(ExpectedNumInserts);
238+
// Collect the inserts by walking up the use-def chain.
239+
Instruction *InsertI = I;
240+
for (auto ExpectedLane : reverse(seq<unsigned>(ExpectedNumInserts))) {
241+
if (InsertI == nullptr)
242+
return std::nullopt;
243+
if (InsertI->getParent() != BB0)
244+
return std::nullopt;
245+
// Check the lane.
246+
auto *LaneC = dyn_cast<ConstantInt>(InsertI->getOperand(2));
247+
if (LaneC == nullptr || LaneC->getSExtValue() != ExpectedLane)
248+
return std::nullopt;
249+
Pack.Instrs.push_back(InsertI);
250+
Pack.Operands[ExpectedLane] = InsertI->getOperand(1);
251+
252+
Value *Op = InsertI->getOperand(0);
253+
if (ExpectedLane == 0) {
254+
// Check the topmost insert. The operand should be a Poison.
255+
if (!isa<PoisonValue>(Op))
256+
return std::nullopt;
257+
} else {
258+
InsertI = dyn_cast<InsertElementInst>(Op);
259+
}
260+
}
261+
return Pack;
262+
}
263+
182264
#ifndef NDEBUG
183265
/// Helper dump function for debugging.
184266
LLVM_DUMP_METHOD static void dump(ArrayRef<Value *> Bndl);
185267
LLVM_DUMP_METHOD static void dump(ArrayRef<Instruction *> Bndl);
186268
#endif // NDEBUG
187269
};
188270

189-
} // namespace llvm::sandboxir
271+
} // namespace sandboxir
272+
273+
} // namespace llvm
190274

191275
#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H

llvm/lib/Transforms/Vectorize/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ add_llvm_component_library(LLVMVectorize
99
SandboxVectorizer/Interval.cpp
1010
SandboxVectorizer/Legality.cpp
1111
SandboxVectorizer/Passes/BottomUpVec.cpp
12+
SandboxVectorizer/Passes/PackReuse.cpp
1213
SandboxVectorizer/Passes/RegionsFromBBs.cpp
1314
SandboxVectorizer/Passes/RegionsFromMetadata.cpp
1415
SandboxVectorizer/Passes/SeedCollection.cpp
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//===- PackReuse.cpp - A pack de-duplication pass -------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h"
10+
#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
11+
12+
namespace llvm::sandboxir {
13+
14+
bool PackReuse::runOnRegion(Region &Rgn, const Analyses &A) {
15+
if (Rgn.empty())
16+
return Change;
17+
// The key to the map is the ordered operands of the pack.
18+
// The value is a vector of all Pack Instrs with the same operands.
19+
DenseMap<std::pair<BasicBlock *, SmallVector<Value *>>,
20+
SmallVector<SmallVector<Instruction *>>>
21+
PacksMap;
22+
// Go over the region and look for pack patterns.
23+
for (auto *I : Rgn) {
24+
auto PackOpt = VecUtils::matchPack(I);
25+
if (PackOpt) {
26+
// TODO: For now limit pack reuse within a BB.
27+
BasicBlock *BB = (*PackOpt->Instrs.front()).getParent();
28+
PacksMap[{BB, PackOpt->Operands}].push_back(PackOpt->Instrs);
29+
}
30+
}
31+
for (auto &Pair : PacksMap) {
32+
auto &Packs = Pair.second;
33+
if (Packs.size() <= 1)
34+
continue;
35+
// Sort packs by program order.
36+
sort(Packs, [](const auto &PackInstrs1, const auto &PackInstrs2) {
37+
return PackInstrs1.front()->comesBefore(PackInstrs2.front());
38+
});
39+
Instruction *TopMostPack = Packs[0].front();
40+
// Replace duplicate packs with the first one.
41+
for (const auto &PackInstrs :
42+
make_range(std::next(Packs.begin()), Packs.end())) {
43+
PackInstrs.front()->replaceAllUsesWith(TopMostPack);
44+
// Delete the pack instrs bottom-up since they are now dead.
45+
for (auto *PackI : PackInstrs)
46+
PackI->eraseFromParent();
47+
}
48+
Change = true;
49+
}
50+
return Change;
51+
}
52+
53+
} // namespace llvm::sandboxir

llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#endif
1919

2020
REGION_PASS("null", ::llvm::sandboxir::NullPass)
21+
REGION_PASS("pack-reuse", ::llvm::sandboxir::PackReuse)
2122
REGION_PASS("print-instruction-count", ::llvm::sandboxir::PrintInstructionCount)
2223
REGION_PASS("print-region", ::llvm::sandboxir::PrintRegion)
2324
REGION_PASS("tr-save", ::llvm::sandboxir::TransactionSave)

llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h"
44
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h"
5+
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PackReuse.h"
56
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h"
67
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintRegion.h"
78
#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h"
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-passes="regions-from-metadata<pack-reuse>" %s -S | FileCheck %s
3+
4+
define void @pack_reuse(i8 %v0, i8 %v1, ptr %ptr) {
5+
; CHECK-LABEL: define void @pack_reuse(
6+
; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) {
7+
; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META0:![0-9]+]]
8+
; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META0]]
9+
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
10+
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
11+
; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META0]]
12+
; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META0]]
13+
; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META0]]
14+
; CHECK-NEXT: ret void
15+
;
16+
%PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
17+
%PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0
18+
store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0
19+
20+
; Should reuse PackA1.
21+
%PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
22+
%PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0
23+
store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0
24+
25+
; Should remain.
26+
%PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0
27+
%PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0
28+
store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0
29+
ret void
30+
}
31+
32+
; TODO: For now we don't support reusing packs from earlier BBs.
33+
define void @pack_cross_bb(i8 %v0, i8 %v1, ptr %ptr) {
34+
; CHECK-LABEL: define void @pack_cross_bb(
35+
; CHECK-SAME: i8 [[V0:%.*]], i8 [[V1:%.*]], ptr [[PTR:%.*]]) {
36+
; CHECK-NEXT: [[ENTRY:.*:]]
37+
; CHECK-NEXT: [[PACKA0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1:![0-9]+]]
38+
; CHECK-NEXT: [[PACKA1:%.*]] = insertelement <2 x i8> [[PACKA0]], i8 [[V1]], i64 1, !sandboxvec [[META1]]
39+
; CHECK-NEXT: store <2 x i8> [[PACKA1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
40+
; CHECK-NEXT: br label %[[BB:.*]]
41+
; CHECK: [[BB]]:
42+
; CHECK-NEXT: [[PACKB0:%.*]] = insertelement <2 x i8> poison, i8 [[V0]], i64 0, !sandboxvec [[META1]]
43+
; CHECK-NEXT: [[PACKB1:%.*]] = insertelement <2 x i8> [[PACKB0]], i8 [[V1]], i64 1, !sandboxvec [[META1]]
44+
; CHECK-NEXT: store <2 x i8> [[PACKB1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
45+
; CHECK-NEXT: [[PACKC0:%.*]] = insertelement <2 x i8> poison, i8 [[V1]], i64 0, !sandboxvec [[META1]]
46+
; CHECK-NEXT: [[PACKC1:%.*]] = insertelement <2 x i8> [[PACKC0]], i8 [[V0]], i64 1, !sandboxvec [[META1]]
47+
; CHECK-NEXT: store <2 x i8> [[PACKC1]], ptr [[PTR]], align 2, !sandboxvec [[META1]]
48+
; CHECK-NEXT: ret void
49+
;
50+
entry:
51+
%PackA0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
52+
%PackA1 = insertelement <2 x i8> %PackA0, i8 %v1, i64 1, !sandboxvec !0
53+
store <2 x i8> %PackA1, ptr %ptr, !sandboxvec !0
54+
br label %bb
55+
56+
bb:
57+
%PackB0 = insertelement <2 x i8> poison, i8 %v0, i64 0, !sandboxvec !0
58+
%PackB1 = insertelement <2 x i8> %PackB0, i8 %v1, i64 1, !sandboxvec !0
59+
store <2 x i8> %PackB1, ptr %ptr, !sandboxvec !0
60+
61+
%PackC0 = insertelement <2 x i8> poison, i8 %v1, i64 0, !sandboxvec !0
62+
%PackC1 = insertelement <2 x i8> %PackC0, i8 %v0, i64 1, !sandboxvec !0
63+
store <2 x i8> %PackC1, ptr %ptr, !sandboxvec !0
64+
ret void
65+
}
66+
67+
!0 = distinct !{!"sandboxregion"}
68+
;.
69+
; CHECK: [[META0]] = distinct !{!"sandboxregion"}
70+
; CHECK: [[META1]] = distinct !{!"sandboxregion"}
71+
;.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<tr-save,bottom-up-vec,tr-accept>" %s -S | FileCheck %s --check-prefix NOREUSE
3+
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<tr-save,bottom-up-vec,pack-reuse,tr-accept>" %s -S | FileCheck %s --check-prefix PKREUSE
4+
5+
define void @pack_reuse(ptr %ptr, ptr %ptrX, ptr %ptrY) {
6+
; NOREUSE-LABEL: define void @pack_reuse(
7+
; NOREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) {
8+
; NOREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
9+
; NOREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4
10+
; NOREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]]
11+
; NOREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]]
12+
; NOREUSE-NEXT: [[PACK:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0]]
13+
; NOREUSE-NEXT: [[PACK1:%.*]] = insertelement <2 x float> [[PACK]], float [[LDY]], i32 1, !sandboxvec [[META0]]
14+
; NOREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
15+
; NOREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK1]], [[PACK3]], !sandboxvec [[META0]]
16+
; NOREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
17+
; NOREUSE-NEXT: ret void
18+
;
19+
; PKREUSE-LABEL: define void @pack_reuse(
20+
; PKREUSE-SAME: ptr [[PTR:%.*]], ptr [[PTRX:%.*]], ptr [[PTRY:%.*]]) {
21+
; PKREUSE-NEXT: [[LDX:%.*]] = load float, ptr [[PTRX]], align 4
22+
; PKREUSE-NEXT: [[LDY:%.*]] = load float, ptr [[PTRY]], align 4
23+
; PKREUSE-NEXT: [[PACK2:%.*]] = insertelement <2 x float> poison, float [[LDX]], i32 0, !sandboxvec [[META0:![0-9]+]]
24+
; PKREUSE-NEXT: [[PACK3:%.*]] = insertelement <2 x float> [[PACK2]], float [[LDY]], i32 1, !sandboxvec [[META0]]
25+
; PKREUSE-NEXT: [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
26+
; PKREUSE-NEXT: [[VEC:%.*]] = fsub <2 x float> [[PACK3]], [[PACK3]], !sandboxvec [[META0]]
27+
; PKREUSE-NEXT: store <2 x float> [[VEC]], ptr [[PTR0]], align 4, !sandboxvec [[META0]]
28+
; PKREUSE-NEXT: ret void
29+
;
30+
%ldX = load float, ptr %ptrX
31+
%ldY = load float, ptr %ptrY
32+
33+
%ptr0 = getelementptr float, ptr %ptr, i32 0
34+
%ptr1 = getelementptr float, ptr %ptr, i32 1
35+
%sub0 = fsub float %ldX, %ldX
36+
%sub1 = fsub float %ldY, %ldY
37+
store float %sub0, ptr %ptr0
38+
store float %sub1, ptr %ptr1
39+
ret void
40+
}
41+
;.
42+
; NOREUSE: [[META0]] = distinct !{!"sandboxregion"}
43+
;.
44+
; PKREUSE: [[META0]] = distinct !{!"sandboxregion"}
45+
;.

0 commit comments

Comments
 (0)