Skip to content

Commit

Permalink
Emit LLVM lifetime intrinsics to improve stack usage and codegen in g…
Browse files Browse the repository at this point in the history
…eneral

Lifetime intrinsics help to reduce stack usage, because LLVM can apply
stack coloring to reuse the stack slots of dead allocas for new ones.

For example these functions now both use the same amount of stack, while
previous `bar()` used five times as much as `foo()`:

````rust
fn foo() {
  println("{}", 5);
}

fn bar() {
  println("{}", 5);
  println("{}", 5);
  println("{}", 5);
  println("{}", 5);
  println("{}", 5);
}
````

On top of that, LLVM can also optimize out certain operations when it
knows that memory is dead after a certain point. For example, it can
sometimes remove the zeroing used to cancel the drop glue. This is
possible when the glue drop itself was already removed because the
zeroing dominated the drop glue call. For example in:

````rust
pub fn bar(x: (Box<int>, int)) -> (Box<int>, int) {
    x
}
````

With optimizations, this currently results in:

````llvm
define void @_ZN3bar20h330fa42547df8179niaE({ i64*, i64 }* noalias nocapture nonnull sret, { i64*, i64 }* noalias nocapture nonnull) unnamed_addr #0 {
"_ZN29_$LP$Box$LT$int$GT$$C$int$RP$39glue_drop.$x22glue_drop$x22$LP$1347$RP$17h88cf42702e5a322aE.exit":
  %2 = bitcast { i64*, i64 }* %1 to i8*
  %3 = bitcast { i64*, i64 }* %0 to i8*
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false)
  tail call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 16, i32 8, i1 false)
  ret void
}
````

But with lifetime intrinsics we get:

````llvm
define void @_ZN3bar20h330fa42547df8179niaE({ i64*, i64 }* noalias nocapture nonnull sret, { i64*, i64 }* noalias nocapture nonnull) unnamed_addr #0 {
"_ZN29_$LP$Box$LT$int$GT$$C$int$RP$39glue_drop.$x22glue_drop$x22$LP$1347$RP$17h88cf42702e5a322aE.exit":
  %2 = bitcast { i64*, i64 }* %1 to i8*
  %3 = bitcast { i64*, i64 }* %0 to i8*
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false)
  tail call void @llvm.lifetime.end(i64 16, i8* %2)
  ret void
}
````

Fixes #15665
  • Loading branch information
dotdash committed Jul 22, 2014
1 parent 8748a69 commit 92d1f15
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 8 deletions.
1 change: 1 addition & 0 deletions src/librustc/middle/trans/_match.rs
Expand Up @@ -1603,6 +1603,7 @@ fn mk_binding_alloca<'a,A>(bcx: &'a Block<'a>,
// Subtle: be sure that we *populate* the memory *before*
// we schedule the cleanup.
let bcx = populate(arg, bcx, llval, var_ty);
bcx.fcx.schedule_lifetime_end(cleanup_scope, llval);
bcx.fcx.schedule_drop_mem(cleanup_scope, llval, var_ty);

// Now that memory is initialized and has cleanup scheduled,
Expand Down
34 changes: 33 additions & 1 deletion src/librustc/middle/trans/base.rs
Expand Up @@ -1070,6 +1070,34 @@ pub fn with_cond<'a>(
next_cx
}

pub fn call_lifetime_start(cx: &Block, ptr: ValueRef) {
if cx.sess().opts.optimize == config::No {
return;
}

let _icx = push_ctxt("lifetime_start");
let ccx = cx.ccx();

let llsize = C_u64(ccx, machine::llsize_of_alloc(ccx, val_ty(ptr).element_type()));
let ptr = PointerCast(cx, ptr, Type::i8p(ccx));
let lifetime_start = ccx.get_intrinsic(&"llvm.lifetime.start");
Call(cx, lifetime_start, [llsize, ptr], []);
}

pub fn call_lifetime_end(cx: &Block, ptr: ValueRef) {
if cx.sess().opts.optimize == config::No {
return;
}

let _icx = push_ctxt("lifetime_end");
let ccx = cx.ccx();

let llsize = C_u64(ccx, machine::llsize_of_alloc(ccx, val_ty(ptr).element_type()));
let ptr = PointerCast(cx, ptr, Type::i8p(ccx));
let lifetime_end = ccx.get_intrinsic(&"llvm.lifetime.end");
Call(cx, lifetime_end, [llsize, ptr], []);
}

pub fn call_memcpy(cx: &Block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef, align: u32) {
let _icx = push_ctxt("call_memcpy");
let ccx = cx.ccx();
Expand Down Expand Up @@ -1157,6 +1185,8 @@ pub fn alloca_maybe_zeroed(cx: &Block, ty: Type, name: &str, zero: bool) -> Valu
let b = cx.fcx.ccx.builder();
b.position_before(cx.fcx.alloca_insert_pt.get().unwrap());
memzero(&b, p, ty);
} else {
call_lifetime_start(cx, p);
}
p
}
Expand All @@ -1169,7 +1199,9 @@ pub fn arrayalloca(cx: &Block, ty: Type, v: ValueRef) -> ValueRef {
}
}
debuginfo::clear_source_location(cx.fcx);
return ArrayAlloca(cx, ty, v);
let p = ArrayAlloca(cx, ty, v);
call_lifetime_start(cx, p);
p
}

// Creates and returns space for, or returns the argument representing, the
Expand Down
32 changes: 32 additions & 0 deletions src/librustc/middle/trans/cleanup.rs
Expand Up @@ -226,6 +226,20 @@ impl<'a> CleanupMethods<'a> for FunctionContext<'a> {
self.trans_cleanups_to_exit_scope(ReturnExit)
}

fn schedule_lifetime_end(&self,
cleanup_scope: ScopeId,
val: ValueRef) {
let drop = box LifetimeEnd {
ptr: val,
};

debug!("schedule_lifetime_end({:?}, val={})",
cleanup_scope,
self.ccx.tn.val_to_string(val));

self.schedule_clean(cleanup_scope, drop as Box<Cleanup>);
}

fn schedule_drop_mem(&self,
cleanup_scope: ScopeId,
val: ValueRef,
Expand Down Expand Up @@ -902,6 +916,21 @@ impl Cleanup for FreeValue {
}
}

pub struct LifetimeEnd {
ptr: ValueRef,
}

impl Cleanup for LifetimeEnd {
fn clean_on_unwind(&self) -> bool {
false
}

fn trans<'a>(&self, bcx: &'a Block<'a>) -> &'a Block<'a> {
base::call_lifetime_end(bcx, self.ptr);
bcx
}
}

pub fn temporary_scope(tcx: &ty::ctxt,
id: ast::NodeId)
-> ScopeId {
Expand Down Expand Up @@ -957,6 +986,9 @@ pub trait CleanupMethods<'a> {
cleanup_scope: ast::NodeId,
exit: uint) -> BasicBlockRef;
fn return_exit_block(&'a self) -> BasicBlockRef;
fn schedule_lifetime_end(&self,
cleanup_scope: ScopeId,
val: ValueRef);
fn schedule_drop_mem(&self,
cleanup_scope: ScopeId,
val: ValueRef,
Expand Down
3 changes: 3 additions & 0 deletions src/librustc/middle/trans/context.rs
Expand Up @@ -425,6 +425,9 @@ fn declare_intrinsic(ccx: &CrateContext, key: & &'static str) -> Option<ValueRef
ifn!("llvm.umul.with.overflow.i32" fn(t_i32, t_i32) -> mk_struct!{t_i32, i1});
ifn!("llvm.umul.with.overflow.i64" fn(t_i64, t_i64) -> mk_struct!{t_i64, i1});

ifn!("llvm.lifetime.start" fn(t_i64,i8p) -> void);
ifn!("llvm.lifetime.end" fn(t_i64, i8p) -> void);

ifn!("llvm.expect.i1" fn(i1, i1) -> i1);

// Some intrinsics were introduced in later versions of LLVM, but they have
Expand Down
6 changes: 5 additions & 1 deletion src/librustc/middle/trans/datum.rs
Expand Up @@ -124,6 +124,7 @@ pub fn lvalue_scratch_datum<'a, A>(bcx: &'a Block<'a>,

// Subtle. Populate the scratch memory *before* scheduling cleanup.
let bcx = populate(arg, bcx, scratch);
bcx.fcx.schedule_lifetime_end(scope, scratch);
bcx.fcx.schedule_drop_mem(scope, scratch, ty);

DatumBlock::new(bcx, Datum::new(scratch, ty, Lvalue))
Expand Down Expand Up @@ -169,7 +170,10 @@ fn add_rvalue_clean(mode: RvalueMode,
ty: ty::t) {
match mode {
ByValue => { fcx.schedule_drop_immediate(scope, val, ty); }
ByRef => { fcx.schedule_drop_mem(scope, val, ty); }
ByRef => {
fcx.schedule_lifetime_end(scope, val);
fcx.schedule_drop_mem(scope, val, ty);
}
}
}

Expand Down
5 changes: 3 additions & 2 deletions src/librustc/middle/trans/expr.rs
Expand Up @@ -1145,8 +1145,9 @@ pub fn trans_adt<'a>(bcx: &'a Block<'a>,
let dest = adt::trans_field_ptr(bcx, repr, addr, discr, i);
let e_ty = expr_ty_adjusted(bcx, &**e);
bcx = trans_into(bcx, &**e, SaveIn(dest));
fcx.schedule_drop_mem(cleanup::CustomScope(custom_cleanup_scope),
dest, e_ty);
let scope = cleanup::CustomScope(custom_cleanup_scope);
fcx.schedule_lifetime_end(scope, dest);
fcx.schedule_drop_mem(scope, dest, e_ty);
}

for base in optbase.iter() {
Expand Down
8 changes: 4 additions & 4 deletions src/librustc/middle/trans/tvec.rs
Expand Up @@ -170,6 +170,7 @@ pub fn trans_slice_vstore<'a>(
let llfixed_ty = type_of::type_of(bcx.ccx(), fixed_ty).ptr_to();
let llfixed_casted = BitCast(bcx, llfixed, llfixed_ty);
let cleanup_scope = cleanup::temporary_scope(bcx.tcx(), content_expr.id);
fcx.schedule_lifetime_end(cleanup_scope, llfixed_casted);
fcx.schedule_drop_mem(cleanup_scope, llfixed_casted, fixed_ty);

// Generate the content into the backing array.
Expand Down Expand Up @@ -364,10 +365,9 @@ pub fn write_content<'a>(
i, bcx.val_to_string(lleltptr));
bcx = expr::trans_into(bcx, &**element,
SaveIn(lleltptr));
fcx.schedule_drop_mem(
cleanup::CustomScope(temp_scope),
lleltptr,
vt.unit_ty);
let scope = cleanup::CustomScope(temp_scope);
fcx.schedule_lifetime_end(scope, lleltptr);
fcx.schedule_drop_mem(scope, lleltptr, vt.unit_ty);
}
fcx.pop_custom_cleanup_scope(temp_scope);
}
Expand Down

5 comments on commit 92d1f15

@bors
Copy link
Contributor

@bors bors commented on 92d1f15 Jul 22, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saw approval from alexcrichton
at dotdash@92d1f15

@bors
Copy link
Contributor

@bors bors commented on 92d1f15 Jul 22, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merging dotdash/rust/lifetimes3 = 92d1f15 into auto

@bors
Copy link
Contributor

@bors bors commented on 92d1f15 Jul 22, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dotdash/rust/lifetimes3 = 92d1f15 merged ok, testing candidate = 31c908b

@bors
Copy link
Contributor

@bors bors commented on 92d1f15 Jul 22, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fast-forwarding master to auto = 31c908b

Please sign in to comment.