Skip to content

Commit d320a8e

Browse files
authored
tokenizer: Source positions are always at char boundaries, so avoid overhead when slicing. (#356)
These show up in profiles: https://share.firefox.dev/45Gh2s7
1 parent de00131 commit d320a8e

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

src/tokenizer.rs

+13-10
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ impl<'a> Tokenizer<'a> {
269269

270270
#[inline]
271271
pub fn position(&self) -> SourcePosition {
272+
debug_assert!(self.input.is_char_boundary(self.position));
272273
SourcePosition(self.position)
273274
}
274275

@@ -308,24 +309,26 @@ impl<'a> Tokenizer<'a> {
308309
}
309310

310311
#[inline]
311-
pub fn slice_from(&self, start_pos: SourcePosition) -> &'a str {
312-
&self.input[start_pos.0..self.position]
312+
pub(crate) fn slice_from(&self, start_pos: SourcePosition) -> &'a str {
313+
self.slice(start_pos..self.position())
313314
}
314315

315316
#[inline]
316-
pub fn slice(&self, range: Range<SourcePosition>) -> &'a str {
317-
&self.input[range.start.0..range.end.0]
317+
pub(crate) fn slice(&self, range: Range<SourcePosition>) -> &'a str {
318+
debug_assert!(self.input.is_char_boundary(range.start.0));
319+
debug_assert!(self.input.is_char_boundary(range.end.0));
320+
unsafe { self.input.get_unchecked(range.start.0..range.end.0) }
318321
}
319322

320323
pub fn current_source_line(&self) -> &'a str {
321-
let current = self.position;
322-
let start = self.input[0..current]
324+
let current = self.position();
325+
let start = self.slice(SourcePosition(0)..current)
323326
.rfind(|c| matches!(c, '\r' | '\n' | '\x0C'))
324327
.map_or(0, |start| start + 1);
325-
let end = self.input[current..]
328+
let end = self.slice(current..SourcePosition(self.input.len()))
326329
.find(|c| matches!(c, '\r' | '\n' | '\x0C'))
327-
.map_or(self.input.len(), |end| current + end);
328-
&self.input[start..end]
330+
.map_or(self.input.len(), |end| current.0 + end);
331+
self.slice(SourcePosition(start)..SourcePosition(end))
329332
}
330333

331334
#[inline]
@@ -421,7 +424,7 @@ impl<'a> Tokenizer<'a> {
421424

422425
#[inline]
423426
fn next_char(&self) -> char {
424-
self.input[self.position..].chars().next().unwrap()
427+
unsafe { self.input.get_unchecked(self.position().0..) }.chars().next().unwrap()
425428
}
426429

427430
// Given that a newline has been seen, advance over the newline

0 commit comments

Comments
 (0)