Skip to content

Commit

Permalink
Merge pull request pulldown-cmark#222 from marcusklaas/offset-iter
Browse files Browse the repository at this point in the history
Implement an event iterator that includes source offsets
  • Loading branch information
marcusklaas committed Mar 17, 2019
2 parents b1e2395 + 85b17bd commit e360e92
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 40 deletions.
2 changes: 1 addition & 1 deletion src/lib.rs
Expand Up @@ -42,5 +42,5 @@ mod tree;
mod linklabel;
mod strings;

pub use crate::parse::{Parser, Alignment, Event, Tag, Options, LinkType};
pub use crate::parse::{Parser, OffsetIter, Alignment, Event, Tag, Options, LinkType};
pub use crate::strings::{CowStr, InlineStr};
119 changes: 80 additions & 39 deletions src/parse.rs
Expand Up @@ -21,6 +21,7 @@
//! Tree-based two pass parser.

use std::collections::HashMap;
use std::ops::Range;

use unicase::UniCase;

Expand Down Expand Up @@ -2552,6 +2553,67 @@ impl<'a> Parser<'a> {
}
stack.pop_to(&mut self.tree, 0);
}

/// Returns the next event in a pre-order AST walk, along with its
/// start and end offset in the source.
fn next_event(&mut self) -> Option<(Event<'a>, Range<usize>)> {
match self.tree.cur() {
TreePointer::Nil => {
let ix = self.tree.pop()?;
let tag = item_to_tag(&self.tree[ix].item).unwrap();
self.offset = self.tree[ix].item.end;
self.tree.next_sibling();
return Some((Event::End(tag), self.tree[ix].item.start..self.tree[ix].item.end));
}
TreePointer::Valid(mut cur_ix) => {
if let ItemBody::Backslash = self.tree[cur_ix].item.body {
if let TreePointer::Valid(next) = self.tree.next_sibling() {
cur_ix = next;
}
}
if self.tree[cur_ix].item.body.is_inline() {
self.handle_inline();
}
}
}

if let TreePointer::Valid(cur_ix) = self.tree.cur() {
if let Some(tag) = item_to_tag(&self.tree[cur_ix].item) {
self.offset = if let TreePointer::Valid(child_ix) = self.tree[cur_ix].child {
self.tree[child_ix].item.start
} else {
self.tree[cur_ix].item.end
};
self.tree.push();
Some((Event::Start(tag), self.tree[cur_ix].item.start..self.tree[cur_ix].item.end))
} else {
self.tree.next_sibling();
let item = &self.tree[cur_ix].item;
self.offset = item.end;
Some((item_to_event(item, self.text), item.start..item.end))
}
} else {
None
}
}

pub fn into_offset_iter(self) -> OffsetIter<'a> {
OffsetIter {
inner: self,
}
}
}

pub struct OffsetIter<'a> {
inner: Parser<'a>,
}

impl<'a> Iterator for OffsetIter<'a> {
type Item = (Event<'a>, Range<usize>);

fn next(&mut self) -> Option<Self::Item> {
self.inner.next_event()
}
}

fn item_to_tag<'a>(item: &Item<'a>) -> Option<Tag<'a>> {
Expand Down Expand Up @@ -2651,46 +2713,8 @@ fn surgerize_tight_list<'a>(tree : &mut Tree<Item<'a>>) {
impl<'a> Iterator for Parser<'a> {
type Item = Event<'a>;

// TODO: this should probably be destructive. actually remove items from the tree
// so we don't have to clone owned items (Strings)
fn next(&mut self) -> Option<Event<'a>> {
match self.tree.cur() {
TreePointer::Nil => {
let ix = self.tree.pop()?;
let tag = item_to_tag(&self.tree[ix].item).unwrap();
self.offset = self.tree[ix].item.end;
self.tree.next_sibling();
return Some(Event::End(tag));
}
TreePointer::Valid(mut cur_ix) => {
if let ItemBody::Backslash = self.tree[cur_ix].item.body {
if let TreePointer::Valid(next) = self.tree.next_sibling() {
cur_ix = next;
}
}
if self.tree[cur_ix].item.body.is_inline() {
self.handle_inline();
}
}
}

if let TreePointer::Valid(cur_ix) = self.tree.cur() {
if let Some(tag) = item_to_tag(&self.tree[cur_ix].item) {
self.offset = if let TreePointer::Valid(child_ix) = self.tree[cur_ix].child {
self.tree[child_ix].item.start
} else {
self.tree[cur_ix].item.end
};
self.tree.push();
Some(Event::Start(tag))
} else {
self.tree.next_sibling();
self.offset = self.tree[cur_ix].item.end;
Some(item_to_event(&self.tree[cur_ix].item, self.text))
}
} else {
None
}
self.next_event().map(|(ev, _range)| ev)
}
}

Expand All @@ -2712,6 +2736,23 @@ mod test {
assert_eq!(3, Parser::new("<").count());
}

#[test]
fn offset_iter() {
let event_offsets: Vec<_> = Parser::new("*hello* world")
.into_offset_iter()
.map(|(_ev, range)| range)
.collect();
let expected_offsets = vec![
(0..13),
(0..7),
(1..6),
(0..7),
(7..13),
(0..13)
];
assert_eq!(expected_offsets, event_offsets);
}

#[test]
fn link_def_at_eof() {
let test_str = "[My site][world]\n\n[world]: https://vincentprouillet.com";
Expand Down

0 comments on commit e360e92

Please sign in to comment.