diff --git a/hex-literal/src/comments.rs b/hex-literal/src/comments.rs new file mode 100644 index 00000000..02dd6993 --- /dev/null +++ b/hex-literal/src/comments.rs @@ -0,0 +1,272 @@ +//! Provides an Iterator decorator that uses a finite state machine to exclude comments +//! from a string in linear time and constant space. + +use std::iter::Peekable; + +pub(crate) trait Exclude: Sized + Iterator { + fn exclude_comments(self) -> ExcludingComments; +} + +impl> Exclude for T { + fn exclude_comments(self) -> ExcludingComments { + ExcludingComments::new_from_iter(self) + } +} + +pub(crate) struct ExcludingComments> { + state: State, + iter: Peekable, +} + +impl> Iterator for ExcludingComments { + type Item = u8; + + fn next(&mut self) -> Option { + let next_byte = self.next_byte(); + if next_byte.is_none() { + match self.state { + State::BlockComment | State::PotentiallyLeavingBlockComment => { + panic!("block comment not terminated with */") + } + _ => {} + } + } + next_byte + } +} + +/// States of the comment removal machine: +///
+///           Normal
+///            '/'                   
+///      PotentialComment
+///     '/'            '*'
+/// LineComment     BlockComment
+///    '\n'            '*'
+///   Normal      PotentiallyLeavingBlockComment
+///                    '/'           '_'
+///                   Normal     BlockComment
+/// 
+enum State { + Normal, + PotentialComment { previous: u8 }, + LineComment, + BlockComment, + PotentiallyLeavingBlockComment, +} + +impl> ExcludingComments { + fn new_from_iter(iter: I) -> Self { + Self { + state: State::Normal, + iter: iter.peekable(), + } + } + + fn next_byte(&mut self) -> Option { + loop { + return match self.state { + State::Normal => { + let next = self.iter.next()?; + match next { + b'/' => { + self.state = State::PotentialComment { previous: next }; + continue; + } + _ => Some(next), + } + } + State::PotentialComment { previous } => { + let peeked_next = self.iter.peek()?; + match peeked_next { + b'/' => { + // second /, enter line comment and consume + self.iter.next(); + self.state = State::LineComment; + continue; + } + b'*' => { + /* entering a block comment consume '*' */ + self.iter.next(); + self.state = State::BlockComment; + continue; + } + _ => { + // here we need to emit the previous character (the first '/') + // and do not consume the current character + self.state = State::Normal; + return Some(previous); + } + } + } + State::LineComment => { + let next = self.iter.next()?; + match next { + b'\n' => { + self.state = State::Normal; + return Some(next); + } + _ => { + // ignore all other characters while in the line comment + continue; + } + } + } + State::BlockComment => { + let next = self.iter.next()?; + match next { + b'*' => { + self.state = State::PotentiallyLeavingBlockComment; + continue; + } + _ => { + /* ignore all other characters while in the block comment */ + continue; + } + } + } + State::PotentiallyLeavingBlockComment => { + let next = self.iter.next()?; + match next { + b'/' => { + /* Left the block comment */ + self.state = State::Normal; + continue; + } + _ => { + /* we're still in the block comment */ + self.state = State::BlockComment; + continue; + } + } + } + }; + } + } +} + +#[cfg(test)] +mod tests { + use std::vec::IntoIter; + + use super::*; + + /// Converts the input to an iterator of u8, excludes comments, maps back to char and collects + /// the results. + fn exclude_comments(input: &str) -> String { + let excluding_comments: ExcludingComments> = input + .to_string() + .into_bytes() + .into_iter() + .exclude_comments(); + excluding_comments.map(|b| b as char).collect() + } + + #[test] + fn empty() { + assert!(exclude_comments("").is_empty()); + } + + #[test] + fn single_char() { + assert_eq!(exclude_comments("0"), "0"); + } + + #[test] + fn two_chars() { + assert_eq!(exclude_comments("ab"), "ab"); + } + + #[test] + fn comment() { + assert_eq!(exclude_comments("ab//cd"), "ab"); + } + + #[test] + fn comments_are_ended_by_new_line() { + assert_eq!(exclude_comments("ab//comment\nde"), "ab\nde"); + } + + #[test] + fn new_lines_without_comments() { + assert_eq!(exclude_comments("ab\nde"), "ab\nde"); + } + + #[test] + fn single_slash_is_not_excluded() { + assert_eq!(exclude_comments("ab/cd"), "ab/cd"); + } + + #[test] + fn line_comments_on_multiple_lines() { + assert_eq!( + exclude_comments( + " +line 1 //comment 1 +line 2 // comment 2 // comment 3 +line 3 +line 4 // comment 4" + ), + " +line 1 +line 2 +line 3 +line 4 " + ); + } + + #[test] + fn block_comment() { + assert_eq!(exclude_comments("ab/*comment*/12"), "ab12"); + } + + #[test] + fn empty_block_comment() { + assert_eq!(exclude_comments("ab/**/12"), "ab12"); + } + + #[test] + fn block_comment_with_asterisk_and_slash_inside() { + assert_eq!(exclude_comments("ab/*false * asterisk and / */12"), "ab12"); + } + + #[test] + fn block_comment_within_line_comment() { + assert_eq!(exclude_comments("ab// /*comment*/12"), "ab"); + } + + #[test] + #[should_panic(expected = "block comment not terminated with */")] + fn block_comment_not_terminated() { + exclude_comments("ab /*comment"); + } + + #[test] + #[should_panic(expected = "block comment not terminated with */")] + fn block_comment_not_completely_terminated() { + exclude_comments("ab /*comment*"); + } + + #[test] + fn block_and_line_comments_on_multiple_lines() { + assert_eq!( + exclude_comments( + " +line 1 /* comment 1 */ +line /* comment 2 */2 // line comment 1 +line 3 /* some comments +over multiple lines +*/ +line 4 /* more multiline comments +* with leading +* asterisks +*/end// line comment 2" + ), + " +line 1 +line 2 +line 3 +line 4 end" + ); + } +} diff --git a/hex-literal/src/lib.rs b/hex-literal/src/lib.rs index 31611619..3d6b840d 100644 --- a/hex-literal/src/lib.rs +++ b/hex-literal/src/lib.rs @@ -23,6 +23,13 @@ //! 08090a0b 0c0d0e0f //! "); //! assert_eq!(bytes, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); +//! assert_eq!(hex!("0a0B // 0c0d line comments"), [10, 11]); +//! assert_eq!(hex!("0a0B // line comments +//! 0c0d"), [10, 11, 12, 13]); +//! assert_eq!(hex!("0a0B /* block comments */ 0c0d"), [10, 11, 12, 13]); +//! assert_eq!(hex!("0a0B /* multi-line +//! block comments +//! */ 0c0d"), [10, 11, 12, 13]); //! # } //! ``` #![doc( @@ -31,10 +38,14 @@ html_root_url = "https://docs.rs/hex-literal/0.3.1" )] +mod comments; extern crate proc_macro; +use std::{iter::FromIterator, vec::IntoIter}; + use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree}; -use std::iter::FromIterator; + +use crate::comments::{Exclude, ExcludingComments}; /// Strips any outer `Delimiter::None` groups from the input, /// returning a `TokenStream` consisting of the innermost @@ -56,8 +67,7 @@ fn ignore_groups(mut input: TokenStream) -> TokenStream { } struct TokenTreeIter { - buf: Vec, - pos: usize, + buf: ExcludingComments>, is_punct: bool, } @@ -75,20 +85,17 @@ impl TokenTreeIter { _ => panic!("expected single string literal"), }; buf.pop(); + let mut iter = buf.into_iter().exclude_comments(); + iter.next(); Self { - buf, - pos: 1, + buf: iter, is_punct: false, } } fn next_hex_val(&mut self) -> Option { loop { - let v = match self.buf.get(self.pos) { - Some(&v) => v, - None => return None, - }; - self.pos += 1; + let v = self.buf.next()?; let n = match v { b'0'..=b'9' => v - 48, b'A'..=b'F' => v - 55,