Deprecate the bytes!() macro.

Replace its usage with byte string literals, except in `bytes!()` tests. Also add a new snapshot, to be able to use the new b"foo" syntax. The src/etc/2014-06-rewrite-bytes-macros.py script automatically rewrites `bytes!()` invocations into byte string literals. Pass it filenames as arguments to generate a diff that you can inspect, or `--apply` followed by filenames to apply the changes in place. Diffs can be piped into `tip` or `pygmentize -l diff` for coloring.
rust-lang · Jun 19, 2014 · 108b8b6 · 108b8b6
1 parent abf7e93
commit 108b8b6
Show file tree

Hide file tree

Showing 42 changed files with 498 additions and 355 deletions.
diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs
@@ -1269,7 +1269,7 @@ fn make_out_name(config: &Config, testfile: &Path, extension: &str) -> Path {
 
 fn aux_output_dir_name(config: &Config, testfile: &Path) -> Path {
     let mut f = output_base_name(config, testfile);
-    match f.filename().map(|s| Vec::from_slice(s).append(bytes!(".libaux"))) {
+    match f.filename().map(|s| Vec::from_slice(s).append(b".libaux")) {
         Some(v) => f.set_filename(v),
         None => ()
     }
@@ -1490,7 +1490,7 @@ fn append_suffix_to_stem(p: &Path, suffix: &str) -> Path {
         (*p).clone()
     } else {
         let stem = p.filestem().unwrap();
-        p.with_filename(Vec::from_slice(stem).append(bytes!("-")).append(suffix.as_bytes()))
+        p.with_filename(Vec::from_slice(stem).append(b"-").append(suffix.as_bytes()))
     }
 }
 

diff --git a/src/doc/complement-cheatsheet.md b/src/doc/complement-cheatsheet.md
@@ -76,7 +76,7 @@ character.
 ~~~
 use std::str;
 
-let x = bytes!(72u8,"ello ",0xF0,0x90,0x80,"World!");
+let x = b"Hello \xF0\x90\x80World!";
 let y = str::from_utf8_lossy(x);
 ~~~
 

diff --git a/src/doc/rust.md b/src/doc/rust.md
@@ -378,6 +378,19 @@ the characters `U+0022` (double-quote) (except when followed by at least as
 many `U+0023` (`#`) characters as were used to start the raw string literal) or
 `U+005C` (`\`) do not have any special meaning.
 
+Examples for byte string literals:
+
+~~~~
+b"foo"; br"foo";                     // foo
+b"\"foo\""; br#""foo""#;             // "foo"
+
+b"foo #\"# bar";
+br##"foo #"# bar"##;                 // foo #"# bar
+
+b"\x52"; b"R"; br"R";                // R
+b"\\x52"; br"\x52";                  // \x52
+~~~~
+
 #### Number literals
 
 ~~~~ {.ebnf .gram}

diff --git a/src/etc/2014-06-rewrite-bytes-macros.py b/src/etc/2014-06-rewrite-bytes-macros.py
@@ -0,0 +1,138 @@
+#!/bin/env python
+#
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import sys
+import subprocess
+import re
+
+
+def main():
+    if len(sys.argv) <= 1:
+        print('Usage: %s [ --apply ] filename1.rs filename2.rs ...'
+              % sys.argv[0])
+    elif sys.argv[1] == '--apply':
+        for filename in sys.argv[2:]:
+            patch(filename)
+    else:
+        for filename in sys.argv[1:]:
+            diff(filename)
+
+
+def patch(filename):
+    source = read(filename)
+    rewritten = rewrite_bytes_macros(source)
+    if rewritten is not None and rewritten != source:
+        write(filename, rewritten)
+
+
+def diff(filename):
+    rewritten = rewrite_bytes_macros(read(filename))
+    if rewritten is not None:
+        p = subprocess.Popen(['diff', '-u', filename, '-'],
+                             stdin=subprocess.PIPE)
+        p.stdin.write(rewritten)
+        p.stdin.close()
+        p.wait()
+
+
+def read(filename):
+    with open(filename, 'rb') as f:
+        return f.read()
+
+
+def write(filename, content):
+    with open(filename, 'wb') as f:
+        f.write(content)
+
+
+def rewrite_bytes_macros(source):
+    rewritten, num_occurrences = BYTES_MACRO_RE.subn(rewrite_one_macro, source)
+    if num_occurrences > 0:
+        return rewritten
+
+
+BYTES_MACRO_RE = re.compile(br'bytes!\(  (?P<args>  [^)]*  )  \)', re.VERBOSE)
+
+
+def rewrite_one_macro(match):
+    try:
+        bytes = parse_bytes(split_args(match.group('args')))
+        return b'b"' + b''.join(map(escape, bytes)) + b'"'
+    except SkipThisRewrite:
+        print('Skipped: %s' % match.group(0).decode('utf8', 'replace'))
+        return match.group(0)
+
+
+class SkipThisRewrite(Exception):
+    pass
+
+
+def split_args(args):
+    previous = b''
+    for arg in args.split(b','):
+        if previous:
+            arg = previous + b',' + arg
+        if arg.count(b'"') % 2 == 0:
+            yield arg
+            previous = b''
+        else:
+            previous = arg
+    if previous:
+        yield previous
+
+
+def parse_bytes(args):
+    for arg in args:
+        arg = arg.strip()
+        if (arg.startswith(b'"') and arg.endswith(b'"')) or (
+                arg.startswith(b"'") and arg.endswith(b"'")):
+            # Escaped newline means something different in Rust and Python.
+            if b'\\\n' in arg:
+                raise SkipThisRewrite
+            for byte in eval(b'u' + arg).encode('utf8'):
+                yield ord(byte)
+        else:
+            if arg.endswith(b'u8'):
+                arg = arg[:-2]
+            # Assume that all Rust integer literals
+            # are valid Python integer literals
+            value = int(eval(arg))
+            assert value <= 0xFF
+            yield value
+
+
+def escape(byte):
+    c = chr(byte)
+    escaped = {
+        b'\0': br'\0',
+        b'\t': br'\t',
+        b'\n': br'\n',
+        b'\r': br'\r',
+        b'\'': b'\\\'',
+        b'\\': br'\\',
+    }.get(c)
+    if escaped is not None:
+        return escaped
+    elif b' ' <= c <= b'~':
+        return chr(byte)
+    else:
+        return ('\\x%02X' % byte).encode('ascii')
+
+
+if str is not bytes:
+    # Python 3.x
+    ord = lambda x: x
+    chr = lambda x: bytes([x])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/libcollections/slice.rs b/src/libcollections/slice.rs
@@ -1957,30 +1957,30 @@ mod tests {
 
     #[test]
     fn test_starts_with() {
-        assert!(bytes!("foobar").starts_with(bytes!("foo")));
-        assert!(!bytes!("foobar").starts_with(bytes!("oob")));
-        assert!(!bytes!("foobar").starts_with(bytes!("bar")));
-        assert!(!bytes!("foo").starts_with(bytes!("foobar")));
-        assert!(!bytes!("bar").starts_with(bytes!("foobar")));
-        assert!(bytes!("foobar").starts_with(bytes!("foobar")));
+        assert!(b"foobar".starts_with(b"foo"));
+        assert!(!b"foobar".starts_with(b"oob"));
+        assert!(!b"foobar".starts_with(b"bar"));
+        assert!(!b"foo".starts_with(b"foobar"));
+        assert!(!b"bar".starts_with(b"foobar"));
+        assert!(b"foobar".starts_with(b"foobar"));
         let empty: &[u8] = [];
         assert!(empty.starts_with(empty));
-        assert!(!empty.starts_with(bytes!("foo")));
-        assert!(bytes!("foobar").starts_with(empty));
+        assert!(!empty.starts_with(b"foo"));
+        assert!(b"foobar".starts_with(empty));
     }
 
     #[test]
     fn test_ends_with() {
-        assert!(bytes!("foobar").ends_with(bytes!("bar")));
-        assert!(!bytes!("foobar").ends_with(bytes!("oba")));
-        assert!(!bytes!("foobar").ends_with(bytes!("foo")));
-        assert!(!bytes!("foo").ends_with(bytes!("foobar")));
-        assert!(!bytes!("bar").ends_with(bytes!("foobar")));
-        assert!(bytes!("foobar").ends_with(bytes!("foobar")));
+        assert!(b"foobar".ends_with(b"bar"));
+        assert!(!b"foobar".ends_with(b"oba"));
+        assert!(!b"foobar".ends_with(b"foo"));
+        assert!(!b"foo".ends_with(b"foobar"));
+        assert!(!b"bar".ends_with(b"foobar"));
+        assert!(b"foobar".ends_with(b"foobar"));
         let empty: &[u8] = [];
         assert!(empty.ends_with(empty));
-        assert!(!empty.ends_with(bytes!("foo")));
-        assert!(bytes!("foobar").ends_with(empty));
+        assert!(!empty.ends_with(b"foo"));
+        assert!(b"foobar".ends_with(empty));
     }
 
     #[test]

diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
@@ -382,7 +382,7 @@ static TAG_CONT_U8: u8 = 128u8;
 /// # Example
 ///
 /// ```rust
-/// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
+/// let input = b"Hello \xF0\x90\x80World";
 /// let output = std::str::from_utf8_lossy(input);
 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
 /// ```
@@ -391,7 +391,7 @@ pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
         return Slice(unsafe { mem::transmute(v) })
     }
 
-    static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
+    static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
     let mut i = 0;
     let total = v.len();
     fn unsafe_get(xs: &[u8], i: uint) -> u8 {
@@ -994,7 +994,7 @@ mod tests {
     fn test_into_bytes() {
         let data = "asdf".to_string();
         let buf = data.into_bytes();
-        assert_eq!(bytes!("asdf"), buf.as_slice());
+        assert_eq!(b"asdf", buf.as_slice());
     }
 
     #[test]
@@ -2050,58 +2050,58 @@ mod tests {
 
     #[test]
     fn test_str_from_utf8() {
-        let xs = bytes!("hello");
+        let xs = b"hello";
         assert_eq!(from_utf8(xs), Some("hello"));
 
-        let xs = bytes!("ศไทย中华Việt Nam");
+        let xs = "ศไทย中华Việt Nam".as_bytes();
         assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
 
-        let xs = bytes!("hello", 0xff);
+        let xs = b"hello\xFF";
         assert_eq!(from_utf8(xs), None);
     }
 
     #[test]
     fn test_str_from_utf8_owned() {
-        let xs = Vec::from_slice(bytes!("hello"));
+        let xs = Vec::from_slice(b"hello");
         assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));
 
-        let xs = Vec::from_slice(bytes!("ศไทย中华Việt Nam"));
+        let xs = Vec::from_slice("ศไทย中华Việt Nam".as_bytes());
         assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));
 
-        let xs = Vec::from_slice(bytes!("hello", 0xff));
+        let xs = Vec::from_slice(b"hello\xFF");
         assert_eq!(from_utf8_owned(xs),
-                   Err(Vec::from_slice(bytes!("hello", 0xff))));
+                   Err(Vec::from_slice(b"hello\xFF")));
     }
 
     #[test]
     fn test_str_from_utf8_lossy() {
-        let xs = bytes!("hello");
+        let xs = b"hello";
         assert_eq!(from_utf8_lossy(xs), Slice("hello"));
 
-        let xs = bytes!("ศไทย中华Việt Nam");
+        let xs = "ศไทย中华Việt Nam".as_bytes();
         assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
 
-        let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
+        let xs = b"Hello\xC2 There\xFF Goodbye";
         assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));
 
-        let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
+        let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
         assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));
 
-        let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
+        let xs = b"\xF5foo\xF5\x80bar";
         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));
 
-        let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
+        let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));
 
-        let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
+        let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));
 
-        let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
+        let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
                                                foo\U00010000bar".to_string()));
 
         // surrogates
-        let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
+        let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
                                                \uFFFD\uFFFD\uFFFDbar".to_string()));
     }
@@ -2298,8 +2298,8 @@ mod bench {
     #[bench]
     fn is_utf8_100_ascii(b: &mut Bencher) {
 
-        let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
-                        Lorem ipsum dolor sit amet, consectetur. ");
+        let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
+                  Lorem ipsum dolor sit amet, consectetur. ";
 
         assert_eq!(100, s.len());
         b.iter(|| {
@@ -2309,7 +2309,7 @@ mod bench {
 
     #[bench]
     fn is_utf8_100_multibyte(b: &mut Bencher) {
-        let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
+        let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
         assert_eq!(100, s.len());
         b.iter(|| {
             is_utf8(s)
@@ -2318,8 +2318,8 @@ mod bench {
 
     #[bench]
     fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
-        let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
-                        Lorem ipsum dolor sit amet, consectetur. ");
+        let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
+                  Lorem ipsum dolor sit amet, consectetur. ";
 
         assert_eq!(100, s.len());
         b.iter(|| {
@@ -2329,7 +2329,7 @@ mod bench {
 
     #[bench]
     fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
-        let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
+        let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
         assert_eq!(100, s.len());
         b.iter(|| {
             let _ = from_utf8_lossy(s);
@@ -2338,7 +2338,7 @@ mod bench {
 
     #[bench]
     fn from_utf8_lossy_invalid(b: &mut Bencher) {
-        let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
+        let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
         b.iter(|| {
             let _ = from_utf8_lossy(s);
         });