Skip to content

Commit

Permalink
Allow for space between each filemap in the codemap
Browse files Browse the repository at this point in the history
So if a filemap's last byte is at position n in the codemap, then n+1 will not refer to any filemap, and the next filemap will begin an n+2.

This is useful for empty files, it means that every file (even empty ones) has a byte in the codemap.

Closes #23301, #26504
  • Loading branch information
nrc committed Jul 21, 2015
1 parent 691ce23 commit 007246c
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 78 deletions.
162 changes: 90 additions & 72 deletions src/libsyntax/codemap.rs
Expand Up @@ -115,6 +115,10 @@ impl Sub for CharPos {
/// are *absolute* positions from the beginning of the codemap, not positions
/// relative to FileMaps. Methods on the CodeMap can be used to relate spans back
/// to the original source.
/// You must be careful if the span crosses more than one file - you will not be
/// able to use many of the functions on spans in codemap and you cannot assume
/// that the length of the span = hi - lo; there may be space in the BytePos
/// range between files.
#[derive(Clone, Copy, Hash)]
pub struct Span {
pub lo: BytePos,
Expand Down Expand Up @@ -339,7 +343,7 @@ pub struct MultiByteChar {
pub bytes: usize,
}

/// A single source in the CodeMap
/// A single source in the CodeMap.
pub struct FileMap {
/// The name of the file that the source came from, source that doesn't
/// originate from files has names between angle brackets by convention,
Expand Down Expand Up @@ -508,6 +512,9 @@ impl FileMap {
lines.get(line_number).map(|&line| {
let begin: BytePos = line - self.start_pos;
let begin = begin.to_usize();
// We can't use `lines.get(line_number+1)` because we might
// be parsing when we call this function and thus the current
// line is the last one we have line info for.
let slice = &src[begin..];
match slice.find('\n') {
Some(e) => &slice[..e],
Expand Down Expand Up @@ -598,27 +605,27 @@ impl CodeMap {
Ok(self.new_filemap(path.to_str().unwrap().to_string(), src))
}

fn next_start_pos(&self) -> usize {
let files = self.files.borrow();
match files.last() {
None => 0,
// Add one so there is some space between files. This lets us distinguish
// positions in the codemap, even in the presence of zero-length files.
Some(last) => last.end_pos.to_usize() + 1,
}
}

/// Creates a new filemap without setting its line information. If you don't
/// intend to set the line information yourself, you should use new_filemap_and_lines.
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
let start_pos = match files.last() {
None => 0,
Some(last) => last.end_pos.to_usize(),
};

// Remove utf-8 BOM if any.
if src.starts_with("\u{feff}") {
src.drain(..3);
}

// Append '\n' in case it's not already there.
// This is a workaround to prevent CodeMap.lookup_filemap_idx from
// accidentally overflowing into the next filemap in case the last byte
// of span is also the last byte of filemap, which leads to incorrect
// results from CodeMap.span_to_*.
if !src.is_empty() && !src.ends_with("\n") {
src.push('\n');
}

let end_pos = start_pos + src.len();

let filemap = Rc::new(FileMap {
Expand All @@ -645,11 +652,8 @@ impl CodeMap {
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>)
-> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
let start_pos = match files.last() {
None => 0,
Some(last) => last.end_pos.to_usize(),
};

let end_pos = Pos::from_usize(start_pos + source_len);
let start_pos = Pos::from_usize(start_pos);
Expand Down Expand Up @@ -686,39 +690,61 @@ impl CodeMap {

/// Lookup source information about a BytePos
pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
let line = a + 1; // Line numbers start at 1
let chpos = self.bytepos_to_file_charpos(pos);
let linebpos = (*f.lines.borrow())[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
debug!("byte pos {:?} is on the line at byte pos {:?}",
pos, linebpos);
debug!("char pos {:?} is on the line at char pos {:?}",
chpos, linechpos);
debug!("byte is on line: {}", line);
assert!(chpos >= linechpos);
Loc {
file: f,
line: line,
col: chpos - linechpos
match self.lookup_line(pos) {
Ok(FileMapAndLine { fm: f, line: a }) => {
let line = a + 1; // Line numbers start at 1
let linebpos = (*f.lines.borrow())[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
debug!("byte pos {:?} is on the line at byte pos {:?}",
pos, linebpos);
debug!("char pos {:?} is on the line at char pos {:?}",
chpos, linechpos);
debug!("byte is on line: {}", line);
assert!(chpos >= linechpos);
Loc {
file: f,
line: line,
col: chpos - linechpos,
}
}
Err(f) => {
Loc {
file: f,
line: 0,
col: chpos,
}
}
}
}

fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
// If the relevant filemap is empty, we don't return a line number.
fn lookup_line(&self, pos: BytePos) -> Result<FileMapAndLine, Rc<FileMap>> {
let idx = self.lookup_filemap_idx(pos);

let files = self.files.borrow();
let f = (*files)[idx].clone();

let len = f.lines.borrow().len();
if len == 0 {
return Err(f);
}

let mut a = 0;
{
let lines = f.lines.borrow();
let mut b = lines.len();
while b - a > 1 {
let m = (a + b) / 2;
if (*lines)[m] > pos { b = m; } else { a = m; }
if (*lines)[m] > pos {
b = m;
} else {
a = m;
}
}
assert!(a <= lines.len());
}
FileMapAndLine {fm: f, line: a}
Ok(FileMapAndLine { fm: f, line: a })
}

pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
Expand Down Expand Up @@ -880,12 +906,15 @@ impl CodeMap {
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes)
}

// Return the index of the filemap (in self.files) which contains pos.
fn lookup_filemap_idx(&self, pos: BytePos) -> usize {
let files = self.files.borrow();
let files = &*files;
let len = files.len();
let count = files.len();

// Binary search for the filemap.
let mut a = 0;
let mut b = len;
let mut b = count;
while b - a > 1 {
let m = (a + b) / 2;
if files[m].start_pos > pos {
Expand All @@ -894,26 +923,8 @@ impl CodeMap {
a = m;
}
}
// There can be filemaps with length 0. These have the same start_pos as
// the previous filemap, but are not the filemaps we want (because they
// are length 0, they cannot contain what we are looking for). So,
// rewind until we find a useful filemap.
loop {
let lines = files[a].lines.borrow();
let lines = lines;
if !lines.is_empty() {
break;
}
if a == 0 {
panic!("position {} does not resolve to a source location",
pos.to_usize());
}
a -= 1;
}
if a >= len {
panic!("position {} does not resolve to a source location",
pos.to_usize())
}

assert!(a < count, "position {} does not resolve to a source location", pos.to_usize());

return a;
}
Expand Down Expand Up @@ -1027,10 +1038,13 @@ mod tests {
let fm = cm.new_filemap("blork.rs".to_string(),
"first line.\nsecond line".to_string());
fm.next_line(BytePos(0));
// Test we can get lines with partial line info.
assert_eq!(fm.get_line(0), Some("first line."));
// TESTING BROKEN BEHAVIOR:
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
fm.next_line(BytePos(10));
assert_eq!(fm.get_line(1), Some("."));
fm.next_line(BytePos(12));
assert_eq!(fm.get_line(2), Some("second line"));
}

#[test]
Expand All @@ -1056,9 +1070,9 @@ mod tests {

fm1.next_line(BytePos(0));
fm1.next_line(BytePos(12));
fm2.next_line(BytePos(24));
fm3.next_line(BytePos(24));
fm3.next_line(BytePos(34));
fm2.next_line(fm2.start_pos);
fm3.next_line(fm3.start_pos);
fm3.next_line(fm3.start_pos + BytePos(12));

cm
}
Expand All @@ -1068,11 +1082,15 @@ mod tests {
// Test lookup_byte_offset
let cm = init_code_map();

let fmabp1 = cm.lookup_byte_offset(BytePos(22));
let fmabp1 = cm.lookup_byte_offset(BytePos(23));
assert_eq!(fmabp1.fm.name, "blork.rs");
assert_eq!(fmabp1.pos, BytePos(22));
assert_eq!(fmabp1.pos, BytePos(23));

let fmabp1 = cm.lookup_byte_offset(BytePos(24));
assert_eq!(fmabp1.fm.name, "empty.rs");
assert_eq!(fmabp1.pos, BytePos(0));

let fmabp2 = cm.lookup_byte_offset(BytePos(24));
let fmabp2 = cm.lookup_byte_offset(BytePos(25));
assert_eq!(fmabp2.fm.name, "blork2.rs");
assert_eq!(fmabp2.pos, BytePos(0));
}
Expand All @@ -1085,7 +1103,7 @@ mod tests {
let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
assert_eq!(cp1, CharPos(22));

let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
let cp2 = cm.bytepos_to_file_charpos(BytePos(25));
assert_eq!(cp2, CharPos(0));
}

Expand All @@ -1099,7 +1117,7 @@ mod tests {
assert_eq!(loc1.line, 2);
assert_eq!(loc1.col, CharPos(10));

let loc2 = cm.lookup_char_pos(BytePos(24));
let loc2 = cm.lookup_char_pos(BytePos(25));
assert_eq!(loc2.file.name, "blork2.rs");
assert_eq!(loc2.line, 1);
assert_eq!(loc2.col, CharPos(0));
Expand All @@ -1115,18 +1133,18 @@ mod tests {
"first line€€.\n€ second line".to_string());

fm1.next_line(BytePos(0));
fm1.next_line(BytePos(22));
fm2.next_line(BytePos(40));
fm2.next_line(BytePos(58));
fm1.next_line(BytePos(28));
fm2.next_line(fm2.start_pos);
fm2.next_line(fm2.start_pos + BytePos(20));

fm1.record_multibyte_char(BytePos(3), 3);
fm1.record_multibyte_char(BytePos(9), 3);
fm1.record_multibyte_char(BytePos(12), 3);
fm1.record_multibyte_char(BytePos(15), 3);
fm1.record_multibyte_char(BytePos(18), 3);
fm2.record_multibyte_char(BytePos(50), 3);
fm2.record_multibyte_char(BytePos(53), 3);
fm2.record_multibyte_char(BytePos(58), 3);
fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);

cm
}
Expand Down
13 changes: 7 additions & 6 deletions src/libsyntax/diagnostic.rs
Expand Up @@ -854,11 +854,12 @@ mod test {
println!("done");
let vec = data.lock().unwrap().clone();
let vec: &[u8] = &vec;
println!("{}", from_utf8(vec).unwrap());
assert_eq!(vec, "dummy.txt: 8 \n\
dummy.txt: 9 \n\
dummy.txt:10 \n\
dummy.txt:11 \n\
dummy.txt:12 \n".as_bytes());
let str = from_utf8(vec).unwrap();
println!("{}", str);
assert_eq!(str, "dummy.txt: 8 line8\n\
dummy.txt: 9 line9\n\
dummy.txt:10 line10\n\
dummy.txt:11 e-lä-vän\n\
dummy.txt:12 tolv\n");
}
}

0 comments on commit 007246c

Please sign in to comment.