Skip to content

Commit

Permalink
Replaced multi-byte character handling in end_point with potentially …
Browse files Browse the repository at this point in the history
…more performant variant.
  • Loading branch information
davidtwco committed Jan 27, 2018
1 parent c71cec8 commit 6235647
Showing 1 changed file with 59 additions and 23 deletions.
82 changes: 59 additions & 23 deletions src/libsyntax/codemap.rs
Expand Up @@ -610,38 +610,74 @@ impl CodeMap {

/// Returns a new span representing just the end-point of this span
pub fn end_point(&self, sp: Span) -> Span {
let hi = sp.hi().0.checked_sub(1).unwrap_or(sp.hi().0);
let hi = self.get_start_of_char_bytepos(BytePos(hi));
let lo = cmp::max(hi.0, sp.lo().0);
sp.with_lo(BytePos(lo))
let pos = sp.hi().0;

let width = self.find_width_of_character_at_span(sp, false);
let corrected_end_position = pos.checked_sub(width).unwrap_or(pos);

let end_point = BytePos(cmp::max(corrected_end_position, sp.lo().0));
sp.with_lo(end_point)
}

/// Returns a new span representing the next character after the end-point of this span
pub fn next_point(&self, sp: Span) -> Span {
let hi = sp.lo().0.checked_add(1).unwrap_or(sp.lo().0);
let hi = self.get_start_of_char_bytepos(BytePos(hi));
let lo = cmp::max(sp.hi().0, hi.0);
Span::new(BytePos(lo), BytePos(lo), sp.ctxt())
let pos = sp.lo().0;

let width = self.find_width_of_character_at_span(sp, true);
let corrected_next_position = pos.checked_add(width).unwrap_or(pos);

let next_point = BytePos(cmp::max(sp.hi().0, corrected_next_position));
Span::new(next_point, next_point, sp.ctxt())
}

fn get_start_of_char_bytepos(&self, bpos: BytePos) -> BytePos {
let idx = self.lookup_filemap_idx(bpos);
let files = self.files.borrow();
let map = &(*files)[idx];
/// Finds the width of a character, either before or after the provided span.
fn find_width_of_character_at_span(&self, sp: Span, forwards: bool) -> u32 {
// Disregard malformed spans and assume a one-byte wide character.
if sp.lo() > sp.hi() {
return 1;
}

for mbc in map.multibyte_chars.borrow().iter() {
if mbc.pos < bpos {
if bpos.to_usize() >= mbc.pos.to_usize() + mbc.bytes {
// If we do, then return the start of the character.
return mbc.pos;
}
} else {
break;
}
let local_begin = self.lookup_byte_offset(sp.lo());
let local_end = self.lookup_byte_offset(sp.hi());

let start_index = local_begin.pos.to_usize();
let end_index = local_end.pos.to_usize();

// Disregard indexes that are at the start or end of their spans, they can't fit bigger
// characters.
if (!forwards && end_index == usize::min_value()) ||
(forwards && start_index == usize::max_value()) {
return 1;
}

let source_len = (local_begin.fm.end_pos - local_begin.fm.start_pos).to_usize();
// Ensure indexes are also not malformed.
if start_index > end_index || end_index > source_len {
return 1;
}

// If this isn't a multibyte character, return the original position.
return bpos;
// We need to extend the snippet to the end of the src rather than to end_index so when
// searching forwards for boundaries we've got somewhere to search.
let snippet = if let Some(ref src) = local_begin.fm.src {
let len = src.len();
(&src[start_index..len]).to_string()
} else if let Some(src) = local_begin.fm.external_src.borrow().get_source() {
let len = src.len();
(&src[start_index..len]).to_string()
} else {
return 1;
};

let mut target = if forwards { end_index + 1 } else { end_index - 1 };
while !snippet.is_char_boundary(target - start_index) {
target = if forwards { target + 1 } else { target - 1 };
}

if forwards {
(target - end_index) as u32
} else {
(end_index - target) as u32
}
}

pub fn get_filemap(&self, filename: &FileName) -> Option<Rc<FileMap>> {
Expand Down

0 comments on commit 6235647

Please sign in to comment.