Add a compile-time error when oversized types are used

LLVM generates wrong code (which may be an instance of compile-time UB) when faced with types that take lots of memory - bigger than the address space. Make using such types a trans error. While trans errors are bad, overbig types are expected to be very rare.
rust-lang · Oct 15, 2014 · e053dfa · e053dfa
1 parent 01d693b
commit e053dfa
Show file tree

Hide file tree

Showing 3 changed files with 142 additions and 65 deletions.
diff --git a/src/librustc/middle/trans/adt.rs b/src/librustc/middle/trans/adt.rs
@@ -163,7 +163,7 @@ pub fn represent_type(cx: &CrateContext, t: ty::t) -> Rc<Repr> {
 fn represent_type_uncached(cx: &CrateContext, t: ty::t) -> Repr {
     match ty::get(t).sty {
         ty::ty_tup(ref elems) => {
-            return Univariant(mk_struct(cx, elems.as_slice(), false), false)
+            return Univariant(mk_struct(cx, elems.as_slice(), false, t), false)
         }
         ty::ty_struct(def_id, ref substs) => {
             let fields = ty::lookup_struct_fields(cx.tcx(), def_id);
@@ -174,12 +174,12 @@ fn represent_type_uncached(cx: &CrateContext, t: ty::t) -> Repr {
             let dtor = ty::ty_dtor(cx.tcx(), def_id).has_drop_flag();
             if dtor { ftys.push(ty::mk_bool()); }
 
-            return Univariant(mk_struct(cx, ftys.as_slice(), packed), dtor)
+            return Univariant(mk_struct(cx, ftys.as_slice(), packed, t), dtor)
         }
         ty::ty_unboxed_closure(def_id, _) => {
             let upvars = ty::unboxed_closure_upvars(cx.tcx(), def_id);
             let upvar_types = upvars.iter().map(|u| u.ty).collect::<Vec<_>>();
-            return Univariant(mk_struct(cx, upvar_types.as_slice(), false),
+            return Univariant(mk_struct(cx, upvar_types.as_slice(), false, t),
                               false)
         }
         ty::ty_enum(def_id, ref substs) => {
@@ -194,7 +194,8 @@ fn represent_type_uncached(cx: &CrateContext, t: ty::t) -> Repr {
                 // (Typechecking will reject discriminant-sizing attrs.)
                 assert_eq!(hint, attr::ReprAny);
                 let ftys = if dtor { vec!(ty::mk_bool()) } else { vec!() };
-                return Univariant(mk_struct(cx, ftys.as_slice(), false), dtor);
+                return Univariant(mk_struct(cx, ftys.as_slice(), false, t),
+                                  dtor);
             }
 
             if !dtor && cases.iter().all(|c| c.tys.len() == 0) {
@@ -225,15 +226,17 @@ fn represent_type_uncached(cx: &CrateContext, t: ty::t) -> Repr {
                 assert_eq!(hint, attr::ReprAny);
                 let mut ftys = cases.get(0).tys.clone();
                 if dtor { ftys.push(ty::mk_bool()); }
-                return Univariant(mk_struct(cx, ftys.as_slice(), false), dtor);
+                return Univariant(mk_struct(cx, ftys.as_slice(), false, t),
+                                  dtor);
             }
 
             if !dtor && cases.len() == 2 && hint == attr::ReprAny {
                 // Nullable pointer optimization
                 let mut discr = 0;
                 while discr < 2 {
-                    if cases.get(1 - discr).is_zerolen(cx) {
-                        let st = mk_struct(cx, cases.get(discr).tys.as_slice(), false);
+                    if cases.get(1 - discr).is_zerolen(cx, t) {
+                        let st = mk_struct(cx, cases.get(discr).tys.as_slice(),
+                                           false, t);
                         match cases.get(discr).find_ptr() {
                             Some(ThinPointer(_)) if st.fields.len() == 1 => {
                                 return RawNullablePointer {
@@ -263,11 +266,15 @@ fn represent_type_uncached(cx: &CrateContext, t: ty::t) -> Repr {
                                      slo: 0, shi: (cases.len() - 1) as i64 };
             let ity = range_to_inttype(cx, hint, &bounds);
 
-            return General(ity, cases.iter().map(|c| {
+            let fields : Vec<_> = cases.iter().map(|c| {
                 let mut ftys = vec!(ty_of_inttype(ity)).append(c.tys.as_slice());
                 if dtor { ftys.push(ty::mk_bool()); }
-                mk_struct(cx, ftys.as_slice(), false)
-            }).collect(), dtor);
+                mk_struct(cx, ftys.as_slice(), false, t)
+            }).collect();
+
+            ensure_enum_fits_in_address_space(cx, ity, fields.as_slice(), t);
+
+            General(ity, fields, dtor)
         }
         _ => cx.sess().bug(format!("adt::represent_type called on non-ADT type: {}",
                            ty_to_string(cx.tcx(), t)).as_slice())
@@ -288,8 +295,8 @@ pub enum PointerField {
 }
 
 impl Case {
-    fn is_zerolen(&self, cx: &CrateContext) -> bool {
-        mk_struct(cx, self.tys.as_slice(), false).size == 0
+    fn is_zerolen(&self, cx: &CrateContext, scapegoat: ty::t) -> bool {
+        mk_struct(cx, self.tys.as_slice(), false, scapegoat).size == 0
     }
 
     fn find_ptr(&self) -> Option<PointerField> {
@@ -344,29 +351,25 @@ fn get_cases(tcx: &ty::ctxt, def_id: ast::DefId, substs: &subst::Substs) -> Vec<
     }).collect()
 }
 
-fn mk_struct(cx: &CrateContext, tys: &[ty::t], packed: bool) -> Struct {
-    if tys.iter().all(|&ty| ty::type_is_sized(cx.tcx(), ty)) {
-        let lltys = tys.iter().map(|&ty| type_of::sizing_type_of(cx, ty)).collect::<Vec<_>>();
-        let llty_rec = Type::struct_(cx, lltys.as_slice(), packed);
-        Struct {
-            size: machine::llsize_of_alloc(cx, llty_rec),
-            align: machine::llalign_of_min(cx, llty_rec),
-            sized: true,
-            packed: packed,
-            fields: Vec::from_slice(tys),
-        }
+fn mk_struct(cx: &CrateContext, tys: &[ty::t], packed: bool, scapegoat: ty::t) -> Struct {
+    let sized = tys.iter().all(|&ty| ty::type_is_sized(cx.tcx(), ty));
+    let lltys : Vec<Type> = if sized {
+        tys.iter()
+           .map(|&ty| type_of::sizing_type_of(cx, ty)).collect()
     } else {
-        // Ignore any dynamically sized fields.
-        let lltys = tys.iter().filter(|&ty| ty::type_is_sized(cx.tcx(), *ty))
-            .map(|&ty| type_of::sizing_type_of(cx, ty)).collect::<Vec<_>>();
-        let llty_rec = Type::struct_(cx, lltys.as_slice(), packed);
-        Struct {
-            size: machine::llsize_of_alloc(cx, llty_rec),
-            align: machine::llalign_of_min(cx, llty_rec),
-            sized: false,
-            packed: packed,
-            fields: Vec::from_slice(tys),
-        }
+        tys.iter().filter(|&ty| ty::type_is_sized(cx.tcx(), *ty))
+           .map(|&ty| type_of::sizing_type_of(cx, ty)).collect()
+    };
+
+    ensure_struct_fits_in_address_space(cx, lltys.as_slice(), packed, scapegoat);
+
+    let llty_rec = Type::struct_(cx, lltys.as_slice(), packed);
+    Struct {
+        size: machine::llsize_of_alloc(cx, llty_rec),
+        align: machine::llalign_of_min(cx, llty_rec),
+        sized: sized,
+        packed: packed,
+        fields: Vec::from_slice(tys),
     }
 }
 
@@ -461,6 +464,48 @@ pub fn ty_of_inttype(ity: IntType) -> ty::t {
     }
 }
 
+// LLVM doesn't like types that don't fit in the address space
+fn ensure_struct_fits_in_address_space(ccx: &CrateContext,
+                                       fields: &[Type],
+                                       packed: bool,
+                                       scapegoat: ty::t) {
+    let mut offset = 0;
+    for &llty in fields.iter() {
+        if !packed {
+            let type_align = machine::llalign_of_min(ccx, llty);
+            offset = roundup(offset, type_align);
+        }
+        offset += machine::llsize_of_alloc(ccx, llty);
+
+        // We can get away with checking for overflow once per iteration,
+        // because field sizes are less than 1<<60.
+        if offset >= ccx.max_obj_size() {
+            ccx.report_overbig_object(scapegoat);
+        }
+    }
+}
+
+fn union_size_and_align(sts: &[Struct]) -> (machine::llsize, machine::llalign) {
+    let size = sts.iter().map(|st| st.size).max().unwrap();
+    let most_aligned = sts.iter().max_by(|st| st.align).unwrap();
+    (size, most_aligned.align)
+}
+
+fn ensure_enum_fits_in_address_space(ccx: &CrateContext,
+                                     discr: IntType,
+                                     fields: &[Struct],
+                                     scapegoat: ty::t) {
+    let discr_size = machine::llsize_of_alloc(ccx, ll_inttype(ccx, discr));
+    let (field_size, field_align) = union_size_and_align(fields);
+
+    // This can't overflow because field_size, discr_size, field_align < 1<<60
+    let total_size = roundup(discr_size, field_align) + field_size;
+
+    if total_size >= ccx.max_obj_size() {
+        ccx.report_overbig_object(scapegoat);
+    }
+}
+
 
 /**
  * LLVM-level types are a little complicated.
@@ -523,13 +568,12 @@ fn generic_type_of(cx: &CrateContext,
             // of the size.
             //
             // FIXME #10604: this breaks when vector types are present.
-            let size = sts.iter().map(|st| st.size).max().unwrap();
-            let most_aligned = sts.iter().max_by(|st| st.align).unwrap();
-            let align = most_aligned.align;
+            let (size, align) = union_size_and_align(sts.as_slice());
+            let align_s = align as u64;
             let discr_ty = ll_inttype(cx, ity);
-            let discr_size = machine::llsize_of_alloc(cx, discr_ty) as u64;
-            let align_units = (size + align - 1) / align - 1;
-            let pad_ty = match align {
+            let discr_size = machine::llsize_of_alloc(cx, discr_ty);
+            let align_units = (size + align_s - 1) / align_s - 1;
+            let pad_ty = match align_s {
                 1 => Type::array(&Type::i8(cx), align_units),
                 2 => Type::array(&Type::i16(cx), align_units),
                 4 => Type::array(&Type::i32(cx), align_units),
@@ -539,10 +583,10 @@ fn generic_type_of(cx: &CrateContext,
                                                               align_units),
                 _ => fail!("unsupported enum alignment: {:?}", align)
             };
-            assert_eq!(machine::llalign_of_min(cx, pad_ty) as u64, align);
-            assert_eq!(align % discr_size, 0);
+            assert_eq!(machine::llalign_of_min(cx, pad_ty), align);
+            assert_eq!(align_s % discr_size, 0);
             let fields = vec!(discr_ty,
-                           Type::array(&discr_ty, align / discr_size - 1),
+                           Type::array(&discr_ty, align_s / discr_size - 1),
                            pad_ty);
             match name {
                 None => Type::struct_(cx, fields.as_slice(), false),

diff --git a/src/librustc/middle/trans/context.rs b/src/librustc/middle/trans/context.rs
@@ -25,6 +25,7 @@ use middle::trans::debuginfo;
 use middle::trans::monomorphize::MonoId;
 use middle::trans::type_::{Type, TypeNames};
 use middle::ty;
+use util::ppaux::Repr;
 use util::sha2::Sha256;
 use util::nodemap::{NodeMap, NodeSet, DefIdMap};
 
@@ -717,6 +718,16 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
     pub fn trait_cache(&self) -> &RefCell<HashMap<Rc<ty::TraitRef>, traits::Vtable<()>>> {
         &self.local.trait_cache
     }
+
+    pub fn max_obj_size(&self) -> u64 {
+        1<<31 /* FIXME: select based on architecture */
+    }
+
+    pub fn report_overbig_object(&self, obj: ty::t) -> ! {
+        self.sess().fatal(
+            format!("Objects of type `{}` are too big for the current ABI",
+                    obj.repr(self.tcx())).as_slice())
+    }
 }
 
 fn declare_intrinsic(ccx: &CrateContext, key: & &'static str) -> Option<ValueRef> {

diff --git a/src/librustc/middle/trans/type_of.rs b/src/librustc/middle/trans/type_of.rs
@@ -24,6 +24,20 @@ use middle::trans::type_::Type;
 use syntax::abi;
 use syntax::ast;
 
+use std::num::CheckedMul;
+
+// LLVM doesn't like objects that are too big. Issue #17913
+fn ensure_array_fits_in_address_space(ccx: &CrateContext,
+                                       llet: Type,
+                                       size: machine::llsize,
+                                       scapegoat: ty::t) {
+    let esz = machine::llsize_of_alloc(ccx, llet);
+    match esz.checked_mul(&size) {
+        Some(n) if n < ccx.max_obj_size() => {}
+        _ => { ccx.report_overbig_object(scapegoat) }
+    }
+}
+
 pub fn arg_is_indirect(ccx: &CrateContext, arg_ty: ty::t) -> bool {
     !type_is_immediate(ccx, arg_ty)
 }
@@ -186,7 +200,10 @@ pub fn sizing_type_of(cx: &CrateContext, t: ty::t) -> Type {
         ty::ty_closure(..) => Type::struct_(cx, [Type::i8p(cx), Type::i8p(cx)], false),
 
         ty::ty_vec(ty, Some(size)) => {
-            Type::array(&sizing_type_of(cx, ty), size as u64)
+            let llty = sizing_type_of(cx, ty);
+            let size = size as u64;
+            ensure_array_fits_in_address_space(cx, llty, size, t);
+            Type::array(&llty, size)
         }
 
         ty::ty_tup(..) | ty::ty_enum(..) | ty::ty_unboxed_closure(..) => {
@@ -196,9 +213,10 @@ pub fn sizing_type_of(cx: &CrateContext, t: ty::t) -> Type {
 
         ty::ty_struct(..) => {
             if ty::type_is_simd(cx.tcx(), t) {
-                let et = ty::simd_type(cx.tcx(), t);
-                let n = ty::simd_size(cx.tcx(), t);
-                Type::vector(&type_of(cx, et), n as u64)
+                let llet = type_of(cx, ty::simd_type(cx.tcx(), t));
+                let n = ty::simd_size(cx.tcx(), t) as u64;
+                ensure_array_fits_in_address_space(cx, llet, n, t);
+                Type::vector(&llet, n)
             } else {
                 let repr = adt::represent_type(cx, t);
                 adt::sizing_type_of(cx, &*repr, false)
@@ -282,21 +300,21 @@ pub fn type_of(cx: &CrateContext, t: ty::t) -> Type {
       ty::ty_uint(t) => Type::uint_from_ty(cx, t),
       ty::ty_float(t) => Type::float_from_ty(cx, t),
       ty::ty_enum(did, ref substs) => {
-        // Only create the named struct, but don't fill it in. We
-        // fill it in *after* placing it into the type cache. This
-        // avoids creating more than one copy of the enum when one
-        // of the enum's variants refers to the enum itself.
-        let repr = adt::represent_type(cx, t);
-        let tps = substs.types.get_slice(subst::TypeSpace);
-        let name = llvm_type_name(cx, an_enum, did, tps);
-        adt::incomplete_type_of(cx, &*repr, name.as_slice())
+          // Only create the named struct, but don't fill it in. We
+          // fill it in *after* placing it into the type cache. This
+          // avoids creating more than one copy of the enum when one
+          // of the enum's variants refers to the enum itself.
+          let repr = adt::represent_type(cx, t);
+          let tps = substs.types.get_slice(subst::TypeSpace);
+          let name = llvm_type_name(cx, an_enum, did, tps);
+          adt::incomplete_type_of(cx, &*repr, name.as_slice())
       }
       ty::ty_unboxed_closure(did, _) => {
-        // Only create the named struct, but don't fill it in. We
-        // fill it in *after* placing it into the type cache.
-        let repr = adt::represent_type(cx, t);
-        let name = llvm_type_name(cx, an_unboxed_closure, did, []);
-        adt::incomplete_type_of(cx, &*repr, name.as_slice())
+          // Only create the named struct, but don't fill it in. We
+          // fill it in *after* placing it into the type cache.
+          let repr = adt::represent_type(cx, t);
+          let name = llvm_type_name(cx, an_unboxed_closure, did, []);
+          adt::incomplete_type_of(cx, &*repr, name.as_slice())
       }
 
       ty::ty_uniq(ty) | ty::ty_rptr(_, ty::mt{ty, ..}) | ty::ty_ptr(ty::mt{ty, ..}) => {
@@ -315,8 +333,11 @@ pub fn type_of(cx: &CrateContext, t: ty::t) -> Type {
           }
       }
 
-      ty::ty_vec(ty, Some(n)) => {
-          Type::array(&type_of(cx, ty), n as u64)
+      ty::ty_vec(ty, Some(size)) => {
+          let size = size as u64;
+          let llty = type_of(cx, ty);
+          ensure_array_fits_in_address_space(cx, llty, size, t);
+          Type::array(&llty, size)
       }
       ty::ty_vec(ty, None) => {
           type_of(cx, ty)
@@ -341,9 +362,10 @@ pub fn type_of(cx: &CrateContext, t: ty::t) -> Type {
       }
       ty::ty_struct(did, ref substs) => {
           if ty::type_is_simd(cx.tcx(), t) {
-              let et = ty::simd_type(cx.tcx(), t);
-              let n = ty::simd_size(cx.tcx(), t);
-              Type::vector(&type_of(cx, et), n as u64)
+              let llet = type_of(cx, ty::simd_type(cx.tcx(), t));
+              let n = ty::simd_size(cx.tcx(), t) as u64;
+              ensure_array_fits_in_address_space(cx, llet, n, t);
+              Type::vector(&llet, n)
           } else {
               // Only create the named struct, but don't fill it in. We fill it
               // in *after* placing it into the type cache. This prevents