Skip to content

Commit

Permalink
Allow line and block comments. (#512)
Browse files Browse the repository at this point in the history
  • Loading branch information
alan-signal committed Jul 2, 2021
1 parent fb107e6 commit aee72b8
Show file tree
Hide file tree
Showing 2 changed files with 289 additions and 10 deletions.
272 changes: 272 additions & 0 deletions hex-literal/src/comments.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
//! Provides an Iterator<Item=u8> decorator that uses a finite state machine to exclude comments
//! from a string in linear time and constant space.

use std::iter::Peekable;

pub(crate) trait Exclude: Sized + Iterator<Item = u8> {
fn exclude_comments(self) -> ExcludingComments<Self>;
}

impl<T: Iterator<Item = u8>> Exclude for T {
fn exclude_comments(self) -> ExcludingComments<T> {
ExcludingComments::new_from_iter(self)
}
}

pub(crate) struct ExcludingComments<I: Iterator<Item = u8>> {
state: State,
iter: Peekable<I>,
}

impl<I: Iterator<Item = u8>> Iterator for ExcludingComments<I> {
type Item = u8;

fn next(&mut self) -> Option<Self::Item> {
let next_byte = self.next_byte();
if next_byte.is_none() {
match self.state {
State::BlockComment | State::PotentiallyLeavingBlockComment => {
panic!("block comment not terminated with */")
}
_ => {}
}
}
next_byte
}
}

/// States of the comment removal machine:
/// <pre>
/// Normal
/// '/'
/// PotentialComment
/// '/' '*'
/// LineComment BlockComment
/// '\n' '*'
/// Normal PotentiallyLeavingBlockComment
/// '/' '_'
/// Normal BlockComment
/// </pre>
enum State {
Normal,
PotentialComment { previous: u8 },
LineComment,
BlockComment,
PotentiallyLeavingBlockComment,
}

impl<I: Iterator<Item = u8>> ExcludingComments<I> {
fn new_from_iter(iter: I) -> Self {
Self {
state: State::Normal,
iter: iter.peekable(),
}
}

fn next_byte(&mut self) -> Option<u8> {
loop {
return match self.state {
State::Normal => {
let next = self.iter.next()?;
match next {
b'/' => {
self.state = State::PotentialComment { previous: next };
continue;
}
_ => Some(next),
}
}
State::PotentialComment { previous } => {
let peeked_next = self.iter.peek()?;
match peeked_next {
b'/' => {
// second /, enter line comment and consume
self.iter.next();
self.state = State::LineComment;
continue;
}
b'*' => {
/* entering a block comment consume '*' */
self.iter.next();
self.state = State::BlockComment;
continue;
}
_ => {
// here we need to emit the previous character (the first '/')
// and do not consume the current character
self.state = State::Normal;
return Some(previous);
}
}
}
State::LineComment => {
let next = self.iter.next()?;
match next {
b'\n' => {
self.state = State::Normal;
return Some(next);
}
_ => {
// ignore all other characters while in the line comment
continue;
}
}
}
State::BlockComment => {
let next = self.iter.next()?;
match next {
b'*' => {
self.state = State::PotentiallyLeavingBlockComment;
continue;
}
_ => {
/* ignore all other characters while in the block comment */
continue;
}
}
}
State::PotentiallyLeavingBlockComment => {
let next = self.iter.next()?;
match next {
b'/' => {
/* Left the block comment */
self.state = State::Normal;
continue;
}
_ => {
/* we're still in the block comment */
self.state = State::BlockComment;
continue;
}
}
}
};
}
}
}

#[cfg(test)]
mod tests {
use std::vec::IntoIter;

use super::*;

/// Converts the input to an iterator of u8, excludes comments, maps back to char and collects
/// the results.
fn exclude_comments(input: &str) -> String {
let excluding_comments: ExcludingComments<IntoIter<u8>> = input
.to_string()
.into_bytes()
.into_iter()
.exclude_comments();
excluding_comments.map(|b| b as char).collect()
}

#[test]
fn empty() {
assert!(exclude_comments("").is_empty());
}

#[test]
fn single_char() {
assert_eq!(exclude_comments("0"), "0");
}

#[test]
fn two_chars() {
assert_eq!(exclude_comments("ab"), "ab");
}

#[test]
fn comment() {
assert_eq!(exclude_comments("ab//cd"), "ab");
}

#[test]
fn comments_are_ended_by_new_line() {
assert_eq!(exclude_comments("ab//comment\nde"), "ab\nde");
}

#[test]
fn new_lines_without_comments() {
assert_eq!(exclude_comments("ab\nde"), "ab\nde");
}

#[test]
fn single_slash_is_not_excluded() {
assert_eq!(exclude_comments("ab/cd"), "ab/cd");
}

#[test]
fn line_comments_on_multiple_lines() {
assert_eq!(
exclude_comments(
"
line 1 //comment 1
line 2 // comment 2 // comment 3
line 3
line 4 // comment 4"
),
"
line 1
line 2
line 3
line 4 "
);
}

#[test]
fn block_comment() {
assert_eq!(exclude_comments("ab/*comment*/12"), "ab12");
}

#[test]
fn empty_block_comment() {
assert_eq!(exclude_comments("ab/**/12"), "ab12");
}

#[test]
fn block_comment_with_asterisk_and_slash_inside() {
assert_eq!(exclude_comments("ab/*false * asterisk and / */12"), "ab12");
}

#[test]
fn block_comment_within_line_comment() {
assert_eq!(exclude_comments("ab// /*comment*/12"), "ab");
}

#[test]
#[should_panic(expected = "block comment not terminated with */")]
fn block_comment_not_terminated() {
exclude_comments("ab /*comment");
}

#[test]
#[should_panic(expected = "block comment not terminated with */")]
fn block_comment_not_completely_terminated() {
exclude_comments("ab /*comment*");
}

#[test]
fn block_and_line_comments_on_multiple_lines() {
assert_eq!(
exclude_comments(
"
line 1 /* comment 1 */
line /* comment 2 */2 // line comment 1
line 3 /* some comments
over multiple lines
*/
line 4 /* more multiline comments
* with leading
* asterisks
*/end// line comment 2"
),
"
line 1
line 2
line 3
line 4 end"
);
}
}
27 changes: 17 additions & 10 deletions hex-literal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@
//! 08090a0b 0c0d0e0f
//! ");
//! assert_eq!(bytes, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
//! assert_eq!(hex!("0a0B // 0c0d line comments"), [10, 11]);
//! assert_eq!(hex!("0a0B // line comments
//! 0c0d"), [10, 11, 12, 13]);
//! assert_eq!(hex!("0a0B /* block comments */ 0c0d"), [10, 11, 12, 13]);
//! assert_eq!(hex!("0a0B /* multi-line
//! block comments
//! */ 0c0d"), [10, 11, 12, 13]);
//! # }
//! ```
#![doc(
Expand All @@ -31,10 +38,14 @@
html_root_url = "https://docs.rs/hex-literal/0.3.1"
)]

mod comments;
extern crate proc_macro;

use std::{iter::FromIterator, vec::IntoIter};

use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree};
use std::iter::FromIterator;

use crate::comments::{Exclude, ExcludingComments};

/// Strips any outer `Delimiter::None` groups from the input,
/// returning a `TokenStream` consisting of the innermost
Expand All @@ -56,8 +67,7 @@ fn ignore_groups(mut input: TokenStream) -> TokenStream {
}

struct TokenTreeIter {
buf: Vec<u8>,
pos: usize,
buf: ExcludingComments<IntoIter<u8>>,
is_punct: bool,
}

Expand All @@ -75,20 +85,17 @@ impl TokenTreeIter {
_ => panic!("expected single string literal"),
};
buf.pop();
let mut iter = buf.into_iter().exclude_comments();
iter.next();
Self {
buf,
pos: 1,
buf: iter,
is_punct: false,
}
}

fn next_hex_val(&mut self) -> Option<u8> {
loop {
let v = match self.buf.get(self.pos) {
Some(&v) => v,
None => return None,
};
self.pos += 1;
let v = self.buf.next()?;
let n = match v {
b'0'..=b'9' => v - 48,
b'A'..=b'F' => v - 55,
Expand Down

0 comments on commit aee72b8

Please sign in to comment.