Skip to content

Commit

Permalink
Auto merge of #22235 - michaelwoerister:cross-crate-spans, r=michaelw…
Browse files Browse the repository at this point in the history
…oerister

This allows to create proper debuginfo line information for items inlined from other crates (e.g. instantiations of generics). Only the codemap's 'metadata' is stored in a crate's metadata. That is, just filename, positions of line-beginnings, etc. but not the actual source code itself.

Crate metadata size is increased by this change because spans in the encoded ASTs take up space now:
```
                BEFORE    AFTER
libcore         36 MiB    39.6 MiB    +10%
libsyntax       51.1 MiB  60.5 MiB    +18.4%
libcollections  11.2 MiB  12.8 MiB    +14.3%
```
This only affects binaries containing metadata (rlibs and dylibs), executables should not be affected in size. 

Fixes #19228 and probably #22226.
  • Loading branch information
bors committed Mar 4, 2015
2 parents bdf6e4f + 2f88655 commit 3b3bb0e
Show file tree
Hide file tree
Showing 12 changed files with 693 additions and 172 deletions.
3 changes: 3 additions & 0 deletions src/librustc/metadata/common.rs
Expand Up @@ -252,3 +252,6 @@ pub const tag_macro_def: uint = 0x9e;
pub const tag_macro_def_body: uint = 0x9f;

pub const tag_paren_sugar: uint = 0xa0;

pub const tag_codemap: uint = 0xa1;
pub const tag_codemap_filemap: uint = 0xa2;
136 changes: 133 additions & 3 deletions src/librustc/metadata/creader.rs
Expand Up @@ -26,7 +26,7 @@ use syntax::ast;
use syntax::abi;
use syntax::attr;
use syntax::attr::AttrMetaMethods;
use syntax::codemap::{Span, mk_sp};
use syntax::codemap::{self, Span, mk_sp, Pos};
use syntax::parse;
use syntax::parse::token::InternedString;
use syntax::parse::token;
Expand Down Expand Up @@ -373,15 +373,17 @@ impl<'a> CrateReader<'a> {
// Maintain a reference to the top most crate.
let root = if root.is_some() { root } else { &crate_paths };

let cnum_map = self.resolve_crate_deps(root, lib.metadata.as_slice(), span);
let loader::Library { dylib, rlib, metadata } = lib;

let loader::Library{ dylib, rlib, metadata } = lib;
let cnum_map = self.resolve_crate_deps(root, metadata.as_slice(), span);
let codemap_import_info = import_codemap(self.sess.codemap(), &metadata);

let cmeta = Rc::new( cstore::crate_metadata {
name: name.to_string(),
data: metadata,
cnum_map: cnum_map,
cnum: cnum,
codemap_import_info: codemap_import_info,
span: span,
});

Expand Down Expand Up @@ -586,3 +588,131 @@ impl<'a> CrateReader<'a> {
}
}
}

/// Imports the codemap from an external crate into the codemap of the crate
/// currently being compiled (the "local crate").
///
/// The import algorithm works analogous to how AST items are inlined from an
/// external crate's metadata:
/// For every FileMap in the external codemap an 'inline' copy is created in the
/// local codemap. The correspondence relation between external and local
/// FileMaps is recorded in the `ImportedFileMap` objects returned from this
/// function. When an item from an external crate is later inlined into this
/// crate, this correspondence information is used to translate the span
/// information of the inlined item so that it refers the correct positions in
/// the local codemap (see `astencode::DecodeContext::tr_span()`).
///
/// The import algorithm in the function below will reuse FileMaps already
/// existing in the local codemap. For example, even if the FileMap of some
/// source file of libstd gets imported many times, there will only ever be
/// one FileMap object for the corresponding file in the local codemap.
///
/// Note that imported FileMaps do not actually contain the source code of the
/// file they represent, just information about length, line breaks, and
/// multibyte characters. This information is enough to generate valid debuginfo
/// for items inlined from other crates.
fn import_codemap(local_codemap: &codemap::CodeMap,
metadata: &MetadataBlob)
-> Vec<cstore::ImportedFileMap> {
let external_codemap = decoder::get_imported_filemaps(metadata.as_slice());

let imported_filemaps = external_codemap.into_iter().map(|filemap_to_import| {
// Try to find an existing FileMap that can be reused for the filemap to
// be imported. A FileMap is reusable if it is exactly the same, just
// positioned at a different offset within the codemap.
let reusable_filemap = {
local_codemap.files
.borrow()
.iter()
.find(|fm| are_equal_modulo_startpos(&fm, &filemap_to_import))
.map(|rc| rc.clone())
};

match reusable_filemap {
Some(fm) => {
cstore::ImportedFileMap {
original_start_pos: filemap_to_import.start_pos,
original_end_pos: filemap_to_import.end_pos,
translated_filemap: fm
}
}
None => {
// We can't reuse an existing FileMap, so allocate a new one
// containing the information we need.
let codemap::FileMap {
name,
start_pos,
end_pos,
lines,
multibyte_chars,
..
} = filemap_to_import;

let source_length = (end_pos - start_pos).to_usize();

// Translate line-start positions and multibyte character
// position into frame of reference local to file.
// `CodeMap::new_imported_filemap()` will then translate those
// coordinates to their new global frame of reference when the
// offset of the FileMap is known.
let lines = lines.into_inner().map_in_place(|pos| pos - start_pos);
let multibyte_chars = multibyte_chars
.into_inner()
.map_in_place(|mbc|
codemap::MultiByteChar {
pos: mbc.pos + start_pos,
bytes: mbc.bytes
});

let local_version = local_codemap.new_imported_filemap(name,
source_length,
lines,
multibyte_chars);
cstore::ImportedFileMap {
original_start_pos: start_pos,
original_end_pos: end_pos,
translated_filemap: local_version
}
}
}
}).collect();

return imported_filemaps;

fn are_equal_modulo_startpos(fm1: &codemap::FileMap,
fm2: &codemap::FileMap)
-> bool {
if fm1.name != fm2.name {
return false;
}

let lines1 = fm1.lines.borrow();
let lines2 = fm2.lines.borrow();

if lines1.len() != lines2.len() {
return false;
}

for (&line1, &line2) in lines1.iter().zip(lines2.iter()) {
if (line1 - fm1.start_pos) != (line2 - fm2.start_pos) {
return false;
}
}

let multibytes1 = fm1.multibyte_chars.borrow();
let multibytes2 = fm2.multibyte_chars.borrow();

if multibytes1.len() != multibytes2.len() {
return false;
}

for (mb1, mb2) in multibytes1.iter().zip(multibytes2.iter()) {
if (mb1.bytes != mb2.bytes) ||
((mb1.pos - fm1.start_pos) != (mb2.pos - fm2.start_pos)) {
return false;
}
}

true
}
}
16 changes: 14 additions & 2 deletions src/librustc/metadata/cstore.rs
Expand Up @@ -27,7 +27,7 @@ use std::cell::RefCell;
use std::rc::Rc;
use flate::Bytes;
use syntax::ast;
use syntax::codemap::Span;
use syntax::codemap;
use syntax::parse::token::IdentInterner;

// A map from external crate numbers (as decoded from some crate file) to
Expand All @@ -41,12 +41,24 @@ pub enum MetadataBlob {
MetadataArchive(loader::ArchiveMetadata),
}

/// Holds information about a codemap::FileMap imported from another crate.
/// See creader::import_codemap() for more information.
pub struct ImportedFileMap {
/// This FileMap's byte-offset within the codemap of its original crate
pub original_start_pos: codemap::BytePos,
/// The end of this FileMap within the codemap of its original crate
pub original_end_pos: codemap::BytePos,
/// The imported FileMap's representation within the local codemap
pub translated_filemap: Rc<codemap::FileMap>
}

pub struct crate_metadata {
pub name: String,
pub data: MetadataBlob,
pub cnum_map: cnum_map,
pub cnum: ast::CrateNum,
pub span: Span,
pub codemap_import_info: Vec<ImportedFileMap>,
pub span: codemap::Span,
}

#[derive(Copy, Debug, PartialEq, Clone)]
Expand Down
17 changes: 16 additions & 1 deletion src/librustc/metadata/decoder.rs
Expand Up @@ -1561,11 +1561,26 @@ pub fn is_associated_type(cdata: Cmd, id: ast::NodeId) -> bool {
}
}


pub fn is_default_trait<'tcx>(cdata: Cmd, id: ast::NodeId) -> bool {
let item_doc = lookup_item(id, cdata.data());
match item_family(item_doc) {
Family::DefaultImpl => true,
_ => false
}
}

pub fn get_imported_filemaps(metadata: &[u8]) -> Vec<codemap::FileMap> {
let crate_doc = rbml::Doc::new(metadata);
let cm_doc = reader::get_doc(crate_doc, tag_codemap);

let mut filemaps = vec![];

reader::tagged_docs(cm_doc, tag_codemap_filemap, |filemap_doc| {
let mut decoder = reader::Decoder::new(filemap_doc);
let filemap: codemap::FileMap = Decodable::decode(&mut decoder).unwrap();
filemaps.push(filemap);
true
});

return filemaps;
}
30 changes: 30 additions & 0 deletions src/librustc/metadata/encoder.rs
Expand Up @@ -1751,6 +1751,28 @@ fn encode_plugin_registrar_fn(ecx: &EncodeContext, rbml_w: &mut Encoder) {
}
}

fn encode_codemap(ecx: &EncodeContext, rbml_w: &mut Encoder) {
rbml_w.start_tag(tag_codemap);
let codemap = ecx.tcx.sess.codemap();

for filemap in &codemap.files.borrow()[..] {

if filemap.lines.borrow().len() == 0 || filemap.is_imported() {
// No need to export empty filemaps, as they can't contain spans
// that need translation.
// Also no need to re-export imported filemaps, as any downstream
// crate will import them from their original source.
continue;
}

rbml_w.start_tag(tag_codemap_filemap);
filemap.encode(rbml_w);
rbml_w.end_tag();
}

rbml_w.end_tag();
}

/// Serialize the text of the exported macros
fn encode_macro_defs(rbml_w: &mut Encoder,
krate: &ast::Crate) {
Expand Down Expand Up @@ -1968,6 +1990,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
lang_item_bytes: u64,
native_lib_bytes: u64,
plugin_registrar_fn_bytes: u64,
codemap_bytes: u64,
macro_defs_bytes: u64,
impl_bytes: u64,
misc_bytes: u64,
Expand All @@ -1982,6 +2005,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
lang_item_bytes: 0,
native_lib_bytes: 0,
plugin_registrar_fn_bytes: 0,
codemap_bytes: 0,
macro_defs_bytes: 0,
impl_bytes: 0,
misc_bytes: 0,
Expand Down Expand Up @@ -2047,6 +2071,11 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
encode_plugin_registrar_fn(&ecx, &mut rbml_w);
stats.plugin_registrar_fn_bytes = rbml_w.writer.tell().unwrap() - i;

// Encode codemap
i = rbml_w.writer.tell().unwrap();
encode_codemap(&ecx, &mut rbml_w);
stats.codemap_bytes = rbml_w.writer.tell().unwrap() - i;

// Encode macro definitions
i = rbml_w.writer.tell().unwrap();
encode_macro_defs(&mut rbml_w, krate);
Expand Down Expand Up @@ -2091,6 +2120,7 @@ fn encode_metadata_inner(wr: &mut SeekableMemWriter,
println!(" lang item bytes: {}", stats.lang_item_bytes);
println!(" native bytes: {}", stats.native_lib_bytes);
println!("plugin registrar bytes: {}", stats.plugin_registrar_fn_bytes);
println!(" codemap bytes: {}", stats.codemap_bytes);
println!(" macro def bytes: {}", stats.macro_defs_bytes);
println!(" impl bytes: {}", stats.impl_bytes);
println!(" misc bytes: {}", stats.misc_bytes);
Expand Down
53 changes: 49 additions & 4 deletions src/librustc/middle/astencode.rs
Expand Up @@ -42,6 +42,7 @@ use syntax;
use std::old_io::Seek;
use std::num::FromPrimitive;
use std::rc::Rc;
use std::cell::Cell;

use rbml::reader;
use rbml::writer::Encoder;
Expand All @@ -58,7 +59,9 @@ struct DecodeContext<'a, 'b, 'tcx: 'a> {
tcx: &'a ty::ctxt<'tcx>,
cdata: &'b cstore::crate_metadata,
from_id_range: ast_util::IdRange,
to_id_range: ast_util::IdRange
to_id_range: ast_util::IdRange,
// Cache the last used filemap for translating spans as an optimization.
last_filemap_index: Cell<usize>,
}

trait tr {
Expand Down Expand Up @@ -120,6 +123,8 @@ impl<'a, 'b, 'c, 'tcx> ast_map::FoldOps for &'a DecodeContext<'b, 'c, 'tcx> {
}
}

/// Decodes an item from its AST in the cdata's metadata and adds it to the
/// ast-map.
pub fn decode_inlined_item<'tcx>(cdata: &cstore::crate_metadata,
tcx: &ty::ctxt<'tcx>,
path: Vec<ast_map::PathElem>,
Expand All @@ -143,7 +148,8 @@ pub fn decode_inlined_item<'tcx>(cdata: &cstore::crate_metadata,
cdata: cdata,
tcx: tcx,
from_id_range: from_id_range,
to_id_range: to_id_range
to_id_range: to_id_range,
last_filemap_index: Cell::new(0)
};
let raw_ii = decode_ast(ast_doc);
let ii = ast_map::map_decoded_item(&dcx.tcx.map, path, raw_ii, dcx);
Expand Down Expand Up @@ -234,8 +240,47 @@ impl<'a, 'b, 'tcx> DecodeContext<'a, 'b, 'tcx> {
assert_eq!(did.krate, ast::LOCAL_CRATE);
ast::DefId { krate: ast::LOCAL_CRATE, node: self.tr_id(did.node) }
}
pub fn tr_span(&self, _span: Span) -> Span {
codemap::DUMMY_SP // FIXME (#1972): handle span properly

/// Translates a `Span` from an extern crate to the corresponding `Span`
/// within the local crate's codemap. `creader::import_codemap()` will
/// already have allocated any additionally needed FileMaps in the local
/// codemap as a side-effect of creating the crate_metadata's
/// `codemap_import_info`.
pub fn tr_span(&self, span: Span) -> Span {
let imported_filemaps = &self.cdata.codemap_import_info[..];

let filemap_index = {
// Optimize for the case that most spans within a translated item
// originate from the same filemap.
let last_filemap_index = self.last_filemap_index.get();

if span.lo >= imported_filemaps[last_filemap_index].original_start_pos &&
span.hi <= imported_filemaps[last_filemap_index].original_end_pos {
last_filemap_index
} else {
let mut a = 0;
let mut b = imported_filemaps.len();

while b - a > 1 {
let m = (a + b) / 2;
if imported_filemaps[m].original_start_pos > span.lo {
b = m;
} else {
a = m;
}
}

self.last_filemap_index.set(a);
a
}
};

let lo = (span.lo - imported_filemaps[filemap_index].original_start_pos) +
imported_filemaps[filemap_index].translated_filemap.start_pos;
let hi = (span.hi - imported_filemaps[filemap_index].original_start_pos) +
imported_filemaps[filemap_index].translated_filemap.start_pos;

codemap::mk_sp(lo, hi)
}
}

Expand Down
6 changes: 5 additions & 1 deletion src/librustc_driver/pretty.rs
Expand Up @@ -542,7 +542,11 @@ pub fn pretty_print_input(sess: Session,

let src_name = driver::source_name(input);
let src = sess.codemap().get_filemap(&src_name[..])
.src.as_bytes().to_vec();
.src
.as_ref()
.unwrap()
.as_bytes()
.to_vec();
let mut rdr = MemReader::new(src);

let out = match ofile {
Expand Down

0 comments on commit 3b3bb0e

Please sign in to comment.