Skip to content

Commit

Permalink
Improve codegen of align_offset when stride == 1
Browse files Browse the repository at this point in the history
Previously checking for `pmoda == 0` would get LLVM to generate branchy
code, when, for `stride = 1` the offset can be computed without such a
branch by doing effectively a `-p % a`.

For well-known (constant) alignments, with the new ordering of these
conditionals, we end up generating 2 to 3 cheap instructions on x86_64:

    movq    %rdi, %rax
    negl    %eax
    andl    $7, %eax

instead of 5+ as previously.

For unknown alignments the new code also generates just 3 instructions:

    negq    %rdi
    leaq    -1(%rsi), %rax
    andq    %rdi, %rax
  • Loading branch information
nagisa committed Aug 16, 2020
1 parent e7271da commit 5d22b18
Showing 1 changed file with 11 additions and 13 deletions.
24 changes: 11 additions & 13 deletions library/core/src/ptr/mod.rs
Expand Up @@ -1172,7 +1172,7 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {

/// Calculate multiplicative modular inverse of `x` modulo `m`.
///
/// This implementation is tailored for align_offset and has following preconditions:
/// This implementation is tailored for `align_offset` and has following preconditions:
///
/// * `m` is a power-of-two;
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
Expand Down Expand Up @@ -1220,23 +1220,21 @@ pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
}

let stride = mem::size_of::<T>();
// SAFETY: `a` is a power-of-two, hence non-zero.
// SAFETY: `a` is a power-of-two, therefore non-zero.
let a_minus_one = unsafe { unchecked_sub(a, 1) };
let pmoda = p as usize & a_minus_one;
if stride == 1 {
// `stride == 1` case can be computed more efficiently through `-p (mod a)`.
return wrapping_sub(0, p as usize) & a_minus_one;
}

let pmoda = p as usize & a_minus_one;
if pmoda == 0 {
// Already aligned. Yay!
return 0;
}

if stride <= 1 {
return if stride == 0 {
// If the pointer is not aligned, and the element is zero-sized, then no amount of
// elements will ever align the pointer.
!0
} else {
wrapping_sub(a, pmoda)
};
} else if stride == 0 {
// If the pointer is not aligned, and the element is zero-sized, then no amount of
// elements will ever align the pointer.
return usize::MAX;
}

let smoda = stride & a_minus_one;
Expand Down

0 comments on commit 5d22b18

Please sign in to comment.