Skip to content

Commit acc7641

Browse files
committed
[APInt] Optimize umul_ov
Change two costly udiv() calls to lshr(1)*RHS + left-shift + plus On one 64-bit umul_ov benchmark, I measured an obvious improvement: 12.8129s -> 3.6257s Note, there may be some value to special case 64-bit (the most common case) with __builtin_umulll_overflow(). Differential Revision: https://reviews.llvm.org/D60669 llvm-svn: 358730
1 parent 9206335 commit acc7641

File tree

2 files changed

+48
-5
lines changed

2 files changed

+48
-5
lines changed

llvm/lib/Support/APInt.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1914,12 +1914,19 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
19141914
}
19151915

19161916
APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1917-
APInt Res = *this * RHS;
1917+
if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1918+
Overflow = true;
1919+
return *this * RHS;
1920+
}
19181921

1919-
if (*this != 0 && RHS != 0)
1920-
Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1921-
else
1922-
Overflow = false;
1922+
APInt Res = lshr(1) * RHS;
1923+
Overflow = Res.isNegative();
1924+
Res <<= 1;
1925+
if ((*this)[0]) {
1926+
Res += RHS;
1927+
if (Res.ult(RHS))
1928+
Overflow = true;
1929+
}
19231930
return Res;
19241931
}
19251932

llvm/unittests/ADT/APIntTest.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2381,6 +2381,42 @@ TEST(APIntTest, RoundingSDiv) {
23812381
}
23822382
}
23832383

2384+
TEST(APIntTest, umul_ov) {
2385+
const std::pair<uint64_t, uint64_t> Overflows[] = {
2386+
{0x8000000000000000, 2},
2387+
{0x5555555555555556, 3},
2388+
{4294967296, 4294967296},
2389+
{4294967295, 4294967298},
2390+
};
2391+
const std::pair<uint64_t, uint64_t> NonOverflows[] = {
2392+
{0x7fffffffffffffff, 2},
2393+
{0x5555555555555555, 3},
2394+
{4294967295, 4294967297},
2395+
};
2396+
2397+
bool Overflow;
2398+
for (auto &X : Overflows) {
2399+
APInt A(64, X.first);
2400+
APInt B(64, X.second);
2401+
(void)A.umul_ov(B, Overflow);
2402+
EXPECT_TRUE(Overflow);
2403+
}
2404+
for (auto &X : NonOverflows) {
2405+
APInt A(64, X.first);
2406+
APInt B(64, X.second);
2407+
(void)A.umul_ov(B, Overflow);
2408+
EXPECT_FALSE(Overflow);
2409+
}
2410+
2411+
for (unsigned Bits = 1; Bits <= 5; ++Bits)
2412+
for (unsigned A = 0; A != 1u << Bits; ++A)
2413+
for (unsigned B = 0; B != 1u << Bits; ++B) {
2414+
APInt C = APInt(Bits, A).umul_ov(APInt(Bits, B), Overflow);
2415+
APInt D = APInt(2 * Bits, A) * APInt(2 * Bits, B);
2416+
EXPECT_TRUE(D.getHiBits(Bits).isNullValue() != Overflow);
2417+
}
2418+
}
2419+
23842420
TEST(APIntTest, SolveQuadraticEquationWrap) {
23852421
// Verify that "Solution" is the first non-negative integer that solves
23862422
// Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution

0 commit comments

Comments
 (0)