From 5d22b18bf28a54586008e9d8d4bfd72012c00e2e Mon Sep 17 00:00:00 2001 From: Simonas Kazlauskas Date: Sun, 16 Aug 2020 17:10:54 +0300 Subject: [PATCH] Improve codegen of align_offset when stride == 1 Previously checking for `pmoda == 0` would get LLVM to generate branchy code, when, for `stride = 1` the offset can be computed without such a branch by doing effectively a `-p % a`. For well-known (constant) alignments, with the new ordering of these conditionals, we end up generating 2 to 3 cheap instructions on x86_64: movq %rdi, %rax negl %eax andl $7, %eax instead of 5+ as previously. For unknown alignments the new code also generates just 3 instructions: negq %rdi leaq -1(%rsi), %rax andq %rdi, %rax --- library/core/src/ptr/mod.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 78308f97461c1..68b5d1df71cb2 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { /// Calculate multiplicative modular inverse of `x` modulo `m`. /// - /// This implementation is tailored for align_offset and has following preconditions: + /// This implementation is tailored for `align_offset` and has following preconditions: /// /// * `m` is a power-of-two; /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) @@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { } let stride = mem::size_of::(); - // SAFETY: `a` is a power-of-two, hence non-zero. + // SAFETY: `a` is a power-of-two, therefore non-zero. let a_minus_one = unsafe { unchecked_sub(a, 1) }; - let pmoda = p as usize & a_minus_one; + if stride == 1 { + // `stride == 1` case can be computed more efficiently through `-p (mod a)`. + return wrapping_sub(0, p as usize) & a_minus_one; + } + let pmoda = p as usize & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; - } - - if stride <= 1 { - return if stride == 0 { - // If the pointer is not aligned, and the element is zero-sized, then no amount of - // elements will ever align the pointer. - !0 - } else { - wrapping_sub(a, pmoda) - }; + } else if stride == 0 { + // If the pointer is not aligned, and the element is zero-sized, then no amount of + // elements will ever align the pointer. + return usize::MAX; } let smoda = stride & a_minus_one;