Allow line and block comments. (#512)

RustCrypto · Jul 2, 2021 · aee72b8 · aee72b8
1 parent fb107e6
commit aee72b8
Show file tree

Hide file tree

Showing 2 changed files with 289 additions and 10 deletions.
diff --git a/hex-literal/src/comments.rs b/hex-literal/src/comments.rs
@@ -0,0 +1,272 @@
+//! Provides an Iterator<Item=u8> decorator that uses a finite state machine to exclude comments
+//! from a string in linear time and constant space.
+
+use std::iter::Peekable;
+
+pub(crate) trait Exclude: Sized + Iterator<Item = u8> {
+    fn exclude_comments(self) -> ExcludingComments<Self>;
+}
+
+impl<T: Iterator<Item = u8>> Exclude for T {
+    fn exclude_comments(self) -> ExcludingComments<T> {
+        ExcludingComments::new_from_iter(self)
+    }
+}
+
+pub(crate) struct ExcludingComments<I: Iterator<Item = u8>> {
+    state: State,
+    iter: Peekable<I>,
+}
+
+impl<I: Iterator<Item = u8>> Iterator for ExcludingComments<I> {
+    type Item = u8;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let next_byte = self.next_byte();
+        if next_byte.is_none() {
+            match self.state {
+                State::BlockComment | State::PotentiallyLeavingBlockComment => {
+                    panic!("block comment not terminated with */")
+                }
+                _ => {}
+            }
+        }
+        next_byte
+    }
+}
+
+/// States of the comment removal machine:
+/// <pre>
+///           Normal
+///            '/'                   
+///      PotentialComment
+///     '/'            '*'
+/// LineComment     BlockComment
+///    '\n'            '*'
+///   Normal      PotentiallyLeavingBlockComment
+///                    '/'           '_'
+///                   Normal     BlockComment
+/// </pre>                                                  
+enum State {
+    Normal,
+    PotentialComment { previous: u8 },
+    LineComment,
+    BlockComment,
+    PotentiallyLeavingBlockComment,
+}
+
+impl<I: Iterator<Item = u8>> ExcludingComments<I> {
+    fn new_from_iter(iter: I) -> Self {
+        Self {
+            state: State::Normal,
+            iter: iter.peekable(),
+        }
+    }
+
+    fn next_byte(&mut self) -> Option<u8> {
+        loop {
+            return match self.state {
+                State::Normal => {
+                    let next = self.iter.next()?;
+                    match next {
+                        b'/' => {
+                            self.state = State::PotentialComment { previous: next };
+                            continue;
+                        }
+                        _ => Some(next),
+                    }
+                }
+                State::PotentialComment { previous } => {
+                    let peeked_next = self.iter.peek()?;
+                    match peeked_next {
+                        b'/' => {
+                            // second /, enter line comment and consume
+                            self.iter.next();
+                            self.state = State::LineComment;
+                            continue;
+                        }
+                        b'*' => {
+                            /* entering a block comment consume '*' */
+                            self.iter.next();
+                            self.state = State::BlockComment;
+                            continue;
+                        }
+                        _ => {
+                            // here we need to emit the previous character (the first '/')
+                            // and do not consume the current character
+                            self.state = State::Normal;
+                            return Some(previous);
+                        }
+                    }
+                }
+                State::LineComment => {
+                    let next = self.iter.next()?;
+                    match next {
+                        b'\n' => {
+                            self.state = State::Normal;
+                            return Some(next);
+                        }
+                        _ => {
+                            // ignore all other characters while in the line comment
+                            continue;
+                        }
+                    }
+                }
+                State::BlockComment => {
+                    let next = self.iter.next()?;
+                    match next {
+                        b'*' => {
+                            self.state = State::PotentiallyLeavingBlockComment;
+                            continue;
+                        }
+                        _ => {
+                            /* ignore all other characters while in the block comment */
+                            continue;
+                        }
+                    }
+                }
+                State::PotentiallyLeavingBlockComment => {
+                    let next = self.iter.next()?;
+                    match next {
+                        b'/' => {
+                            /* Left the block comment */
+                            self.state = State::Normal;
+                            continue;
+                        }
+                        _ => {
+                            /* we're still in the block comment */
+                            self.state = State::BlockComment;
+                            continue;
+                        }
+                    }
+                }
+            };
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::vec::IntoIter;
+
+    use super::*;
+
+    /// Converts the input to an iterator of u8, excludes comments, maps back to char and collects
+    /// the results.
+    fn exclude_comments(input: &str) -> String {
+        let excluding_comments: ExcludingComments<IntoIter<u8>> = input
+            .to_string()
+            .into_bytes()
+            .into_iter()
+            .exclude_comments();
+        excluding_comments.map(|b| b as char).collect()
+    }
+
+    #[test]
+    fn empty() {
+        assert!(exclude_comments("").is_empty());
+    }
+
+    #[test]
+    fn single_char() {
+        assert_eq!(exclude_comments("0"), "0");
+    }
+
+    #[test]
+    fn two_chars() {
+        assert_eq!(exclude_comments("ab"), "ab");
+    }
+
+    #[test]
+    fn comment() {
+        assert_eq!(exclude_comments("ab//cd"), "ab");
+    }
+
+    #[test]
+    fn comments_are_ended_by_new_line() {
+        assert_eq!(exclude_comments("ab//comment\nde"), "ab\nde");
+    }
+
+    #[test]
+    fn new_lines_without_comments() {
+        assert_eq!(exclude_comments("ab\nde"), "ab\nde");
+    }
+
+    #[test]
+    fn single_slash_is_not_excluded() {
+        assert_eq!(exclude_comments("ab/cd"), "ab/cd");
+    }
+
+    #[test]
+    fn line_comments_on_multiple_lines() {
+        assert_eq!(
+            exclude_comments(
+                "
+line 1 //comment 1
+line 2 // comment 2 // comment 3
+line 3
+line 4 // comment 4"
+            ),
+            "
+line 1 
+line 2 
+line 3
+line 4 "
+        );
+    }
+
+    #[test]
+    fn block_comment() {
+        assert_eq!(exclude_comments("ab/*comment*/12"), "ab12");
+    }
+
+    #[test]
+    fn empty_block_comment() {
+        assert_eq!(exclude_comments("ab/**/12"), "ab12");
+    }
+
+    #[test]
+    fn block_comment_with_asterisk_and_slash_inside() {
+        assert_eq!(exclude_comments("ab/*false * asterisk and / */12"), "ab12");
+    }
+
+    #[test]
+    fn block_comment_within_line_comment() {
+        assert_eq!(exclude_comments("ab// /*comment*/12"), "ab");
+    }
+
+    #[test]
+    #[should_panic(expected = "block comment not terminated with */")]
+    fn block_comment_not_terminated() {
+        exclude_comments("ab /*comment");
+    }
+
+    #[test]
+    #[should_panic(expected = "block comment not terminated with */")]
+    fn block_comment_not_completely_terminated() {
+        exclude_comments("ab /*comment*");
+    }
+
+    #[test]
+    fn block_and_line_comments_on_multiple_lines() {
+        assert_eq!(
+            exclude_comments(
+                "
+line 1 /* comment 1 */
+line /* comment 2 */2 // line comment 1
+line 3 /* some comments
+over multiple lines
+*/
+line 4 /* more multiline comments
+* with leading
+* asterisks
+*/end// line comment 2"
+            ),
+            "
+line 1 
+line 2 
+line 3 
+line 4 end"
+        );
+    }
+}
diff --git a/hex-literal/src/lib.rs b/hex-literal/src/lib.rs
@@ -23,6 +23,13 @@
 //!     08090a0b 0c0d0e0f
 //! ");
 //! assert_eq!(bytes, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+//! assert_eq!(hex!("0a0B // 0c0d line comments"), [10, 11]);
+//! assert_eq!(hex!("0a0B // line comments
+//!                  0c0d"), [10, 11, 12, 13]);
+//! assert_eq!(hex!("0a0B /* block comments */ 0c0d"), [10, 11, 12, 13]);
+//! assert_eq!(hex!("0a0B /* multi-line
+//!                          block comments
+//!                       */ 0c0d"), [10, 11, 12, 13]);
 //! # }
 //! ```
 #![doc(
@@ -31,10 +38,14 @@
     html_root_url = "https://docs.rs/hex-literal/0.3.1"
 )]
 
+mod comments;
 extern crate proc_macro;
 
+use std::{iter::FromIterator, vec::IntoIter};
+
 use proc_macro::{Delimiter, Group, Literal, Punct, Spacing, TokenStream, TokenTree};
-use std::iter::FromIterator;
+
+use crate::comments::{Exclude, ExcludingComments};
 
 /// Strips any outer `Delimiter::None` groups from the input,
 /// returning a `TokenStream` consisting of the innermost
@@ -56,8 +67,7 @@ fn ignore_groups(mut input: TokenStream) -> TokenStream {
 }
 
 struct TokenTreeIter {
-    buf: Vec<u8>,
-    pos: usize,
+    buf: ExcludingComments<IntoIter<u8>>,
     is_punct: bool,
 }
 
@@ -75,20 +85,17 @@ impl TokenTreeIter {
             _ => panic!("expected single string literal"),
         };
         buf.pop();
+        let mut iter = buf.into_iter().exclude_comments();
+        iter.next();
         Self {
-            buf,
-            pos: 1,
+            buf: iter,
             is_punct: false,
         }
     }
 
     fn next_hex_val(&mut self) -> Option<u8> {
         loop {
-            let v = match self.buf.get(self.pos) {
-                Some(&v) => v,
-                None => return None,
-            };
-            self.pos += 1;
+            let v = self.buf.next()?;
             let n = match v {
                 b'0'..=b'9' => v - 48,
                 b'A'..=b'F' => v - 55,