Skip to content

Commit

Permalink
Remove the intrinsic for align_offset
Browse files Browse the repository at this point in the history
Keep only the language item. This removes some indirection and makes
codegen worse for debug builds, but simplifies code significantly, which
is a good tradeoff to make, in my opinion.

Besides, the codegen can be improved even further with some constant
evaluation improvements that we expect to happen in the future.
  • Loading branch information
nagisa committed May 17, 2018
1 parent 680031b commit 6d5bf8b
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 84 deletions.
42 changes: 0 additions & 42 deletions src/libcore/intrinsics.rs
Expand Up @@ -1467,48 +1467,6 @@ extern "rust-intrinsic" {
/// docs my friends, its friday!
pub fn align_offset(ptr: *const (), align: usize) -> usize;

/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
/// `align`.
///
/// If it is not possible to align the pointer, the implementation returns
/// `usize::max_value()`.
///
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
/// used with the `offset` or `offset_to` methods.
///
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
/// the returned offset is correct in all terms other than alignment.
///
/// # Unsafety
///
/// `align` must be a power-of-two.
///
/// # Examples
///
/// Accessing adjacent `u8` as `u16`
///
/// ```
/// # #![feature(core_intrinsics)]
/// # fn foo(n: usize) {
/// # use std::intrinsics::align_offset;
/// # use std::mem::align_of;
/// # unsafe {
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
/// let ptr = &x[n] as *const u8;
/// let offset = align_offset(ptr, align_of::<u16>());
/// if offset < x.len() - n - 1 {
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
/// assert_ne!(*u16_ptr, 500);
/// } else {
/// // while the pointer can be aligned via `offset`, it would point
/// // outside the allocation
/// }
/// # } }
/// ```
#[cfg(not(stage0))]
pub fn align_offset<T>(ptr: *const T, align: usize) -> usize;

/// Emits a `!nontemporal` store according to LLVM (see their docs).
/// Probably will never become stable.
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
Expand Down
31 changes: 21 additions & 10 deletions src/libcore/ptr.rs
Expand Up @@ -1478,7 +1478,7 @@ impl<T: ?Sized> *const T {
panic!("align_offset: align is not a power-of-two");
}
unsafe {
intrinsics::align_offset(self, align)
align_offset(self, align)
}
}

Expand Down Expand Up @@ -2543,7 +2543,7 @@ impl<T: ?Sized> *mut T {
panic!("align_offset: align is not a power-of-two");
}
unsafe {
intrinsics::align_offset(self, align)
align_offset(self, align)
}
}

Expand All @@ -2565,8 +2565,6 @@ impl<T: ?Sized> *mut T {
/// Calculate offset (in terms of elements of `stride` stride) that has to be applied
/// to pointer `p` so that pointer `p` would get aligned to `a`.
///
/// This is an implementation of the `align_offset` intrinsic for the case where `stride > 1`.
///
/// Note: This implementation has been carefully tailored to not panic. It is UB for this to panic.
/// The only real change that can be made here is change of `INV_TABLE_MOD_16` and associated
/// constants.
Expand All @@ -2578,7 +2576,7 @@ impl<T: ?Sized> *mut T {
/// Any questions go to @nagisa.
#[lang="align_offset"]
#[cfg(not(stage0))]
unsafe fn align_offset(p: *const (), a: usize, stride: usize) -> usize {
pub(crate) unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize {
/// Calculate multiplicative modular inverse of `x` modulo `m`.
///
/// This implementation is tailored for align_offset and has following preconditions:
Expand All @@ -2587,12 +2585,13 @@ unsafe fn align_offset(p: *const (), a: usize, stride: usize) -> usize {
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
///
/// Implementation of this function shall not panic. Ever.
#[inline]
fn mod_inv(x: usize, m: usize) -> usize {
/// Multiplicative modular inverse table modulo 2⁴ = 16.
///
/// Note, that this table does not contain values where inverse does not exist (i.e. for
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
static INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
const INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
const INV_TABLE_MOD: usize = 16;
/// INV_TABLE_MOD²
Expand Down Expand Up @@ -2627,18 +2626,30 @@ unsafe fn align_offset(p: *const (), a: usize, stride: usize) -> usize {
}
}

let stride = ::mem::size_of::<T>();
let a_minus_one = a.wrapping_sub(1);
let pmoda = p as usize & a_minus_one;
let smoda = stride & a_minus_one;
// a is power-of-two so cannot be 0. stride = 0 is handled by the intrinsic.
let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a));
let gcd = 1usize << gcdpow;

if pmoda == 0 {
// Already aligned. Yay!
return 0;
}

if stride <= 1 {
return if stride == 0 {
// If the pointer is not aligned, and the element is zero-sized, then no amount of
// elements will ever align the pointer.
!0
} else {
a.wrapping_sub(pmoda)
};
}

let smoda = stride & a_minus_one;
// a is power-of-two so cannot be 0. stride = 0 is handled above.
let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a));
let gcd = 1usize << gcdpow;

if gcd == 1 {
// This branch solves for the variable $o$ in following linear congruence equation:
//
Expand Down
7 changes: 5 additions & 2 deletions src/libcore/slice/mod.rs
Expand Up @@ -1794,8 +1794,11 @@ impl<T> [T] {
// handle ZSTs specially, which is – don't handle them at all.
return (self, &[], &[]);
}

// First, find at what point do we split between the first and 2nd slice. Easy with
// ptr.align_offset.
let ptr = self.as_ptr();
let offset = ::intrinsics::align_offset(ptr, ::mem::align_of::<U>());
let offset = ::ptr::align_offset(ptr, ::mem::align_of::<U>());
if offset > self.len() {
return (self, &[], &[]);
} else {
Expand Down Expand Up @@ -1848,7 +1851,7 @@ impl<T> [T] {
// First, find at what point do we split between the first and 2nd slice. Easy with
// ptr.align_offset.
let ptr = self.as_ptr();
let offset = ::intrinsics::align_offset(ptr, ::mem::align_of::<U>());
let offset = ::ptr::align_offset(ptr, ::mem::align_of::<U>());
if offset > self.len() {
return (self, &mut [], &mut []);
} else {
Expand Down
26 changes: 0 additions & 26 deletions src/librustc_codegen_llvm/intrinsic.rs
Expand Up @@ -25,7 +25,6 @@ use type_of::LayoutLlvmExt;
use rustc::ty::{self, Ty};
use rustc::ty::layout::{HasDataLayout, LayoutOf};
use rustc::hir;
use rustc::middle::lang_items::AlignOffsetLangItem;
use syntax::ast;
use syntax::symbol::Symbol;
use builder::Builder;
Expand Down Expand Up @@ -390,31 +389,6 @@ pub fn codegen_intrinsic_call<'a, 'tcx>(bx: &Builder<'a, 'tcx>,
args[0].deref(bx.cx).codegen_get_discr(bx, ret_ty)
}

"align_offset" => {
let (ptr, align) = (args[0].immediate(), args[1].immediate());
let stride_of_t = bx.cx.layout_of(substs.type_at(0)).size_and_align().0.bytes();
let stride = C_usize(bx.cx, stride_of_t);
let zero = C_null(bx.cx.isize_ty);
let max = C_int(cx.isize_ty, -1); // -1isize (wherein I cheat horribly to make !0usize)

if stride_of_t <= 1 {
// offset = ptr as usize % align => offset = ptr as usize & (align - 1)
let modmask = bx.sub(align, C_usize(bx.cx, 1));
let offset = bx.and(bx.ptrtoint(ptr, bx.cx.isize_ty), modmask);
let is_zero = bx.icmp(llvm::IntPredicate::IntEQ, offset, zero);
// if offset == 0 { 0 } else { if stride_of_t == 1 { align - offset } else { !0 } }
bx.select(is_zero, zero, if stride_of_t == 1 {
bx.sub(align, offset)
} else {
max
})
} else {
let did = ::common::langcall(bx.tcx(), Some(span), "", AlignOffsetLangItem);
let instance = ty::Instance::mono(bx.tcx(), did);
let llfn = ::callee::get_fn(bx.cx, instance);
bx.call(llfn, &[ptr, align, stride], None)
}
}
name if name.starts_with("simd_") => {
match generic_simd_intrinsic(bx, name,
callee_ty,
Expand Down
4 changes: 0 additions & 4 deletions src/librustc_typeck/check/intrinsic.rs
Expand Up @@ -314,10 +314,6 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
(0, vec![tcx.mk_fn_ptr(fn_ty), mut_u8, mut_u8], tcx.types.i32)
}

"align_offset" => {
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.types.usize], tcx.types.usize)
},

"nontemporal_store" => {
(1, vec![ tcx.mk_mut_ptr(param(0)), param(0) ], tcx.mk_nil())
}
Expand Down

0 comments on commit 6d5bf8b

Please sign in to comment.