Skip to content

Commit

Permalink
Deprecate the bytes!() macro.
Browse files Browse the repository at this point in the history
Replace its usage with byte string literals, except in `bytes!()` tests.
Also add a new snapshot, to be able to use the new b"foo" syntax.

The src/etc/2014-06-rewrite-bytes-macros.py script automatically
rewrites `bytes!()` invocations into byte string literals.
Pass it filenames as arguments to generate a diff that you can inspect,
or `--apply` followed by filenames to apply the changes in place.
Diffs can be piped into `tip` or `pygmentize -l diff` for coloring.
  • Loading branch information
SimonSapin authored and alexcrichton committed Jun 19, 2014
1 parent abf7e93 commit 108b8b6
Show file tree
Hide file tree
Showing 42 changed files with 498 additions and 355 deletions.
4 changes: 2 additions & 2 deletions src/compiletest/runtest.rs
Expand Up @@ -1269,7 +1269,7 @@ fn make_out_name(config: &Config, testfile: &Path, extension: &str) -> Path {

fn aux_output_dir_name(config: &Config, testfile: &Path) -> Path {
let mut f = output_base_name(config, testfile);
match f.filename().map(|s| Vec::from_slice(s).append(bytes!(".libaux"))) {
match f.filename().map(|s| Vec::from_slice(s).append(b".libaux")) {
Some(v) => f.set_filename(v),
None => ()
}
Expand Down Expand Up @@ -1490,7 +1490,7 @@ fn append_suffix_to_stem(p: &Path, suffix: &str) -> Path {
(*p).clone()
} else {
let stem = p.filestem().unwrap();
p.with_filename(Vec::from_slice(stem).append(bytes!("-")).append(suffix.as_bytes()))
p.with_filename(Vec::from_slice(stem).append(b"-").append(suffix.as_bytes()))
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/doc/complement-cheatsheet.md
Expand Up @@ -76,7 +76,7 @@ character.
~~~
use std::str;
let x = bytes!(72u8,"ello ",0xF0,0x90,0x80,"World!");
let x = b"Hello \xF0\x90\x80World!";
let y = str::from_utf8_lossy(x);
~~~

Expand Down
13 changes: 13 additions & 0 deletions src/doc/rust.md
Expand Up @@ -378,6 +378,19 @@ the characters `U+0022` (double-quote) (except when followed by at least as
many `U+0023` (`#`) characters as were used to start the raw string literal) or
`U+005C` (`\`) do not have any special meaning.

Examples for byte string literals:

~~~~
b"foo"; br"foo"; // foo
b"\"foo\""; br#""foo""#; // "foo"
b"foo #\"# bar";
br##"foo #"# bar"##; // foo #"# bar
b"\x52"; b"R"; br"R"; // R
b"\\x52"; br"\x52"; // \x52
~~~~

#### Number literals

~~~~ {.ebnf .gram}
Expand Down
138 changes: 138 additions & 0 deletions src/etc/2014-06-rewrite-bytes-macros.py
@@ -0,0 +1,138 @@
#!/bin/env python
#
# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

import sys
import subprocess
import re


def main():
if len(sys.argv) <= 1:
print('Usage: %s [ --apply ] filename1.rs filename2.rs ...'
% sys.argv[0])
elif sys.argv[1] == '--apply':
for filename in sys.argv[2:]:
patch(filename)
else:
for filename in sys.argv[1:]:
diff(filename)


def patch(filename):
source = read(filename)
rewritten = rewrite_bytes_macros(source)
if rewritten is not None and rewritten != source:
write(filename, rewritten)


def diff(filename):
rewritten = rewrite_bytes_macros(read(filename))
if rewritten is not None:
p = subprocess.Popen(['diff', '-u', filename, '-'],
stdin=subprocess.PIPE)
p.stdin.write(rewritten)
p.stdin.close()
p.wait()


def read(filename):
with open(filename, 'rb') as f:
return f.read()


def write(filename, content):
with open(filename, 'wb') as f:
f.write(content)


def rewrite_bytes_macros(source):
rewritten, num_occurrences = BYTES_MACRO_RE.subn(rewrite_one_macro, source)
if num_occurrences > 0:
return rewritten


BYTES_MACRO_RE = re.compile(br'bytes!\( (?P<args> [^)]* ) \)', re.VERBOSE)


def rewrite_one_macro(match):
try:
bytes = parse_bytes(split_args(match.group('args')))
return b'b"' + b''.join(map(escape, bytes)) + b'"'
except SkipThisRewrite:
print('Skipped: %s' % match.group(0).decode('utf8', 'replace'))
return match.group(0)


class SkipThisRewrite(Exception):
pass


def split_args(args):
previous = b''
for arg in args.split(b','):
if previous:
arg = previous + b',' + arg
if arg.count(b'"') % 2 == 0:
yield arg
previous = b''
else:
previous = arg
if previous:
yield previous


def parse_bytes(args):
for arg in args:
arg = arg.strip()
if (arg.startswith(b'"') and arg.endswith(b'"')) or (
arg.startswith(b"'") and arg.endswith(b"'")):
# Escaped newline means something different in Rust and Python.
if b'\\\n' in arg:
raise SkipThisRewrite
for byte in eval(b'u' + arg).encode('utf8'):
yield ord(byte)
else:
if arg.endswith(b'u8'):
arg = arg[:-2]
# Assume that all Rust integer literals
# are valid Python integer literals
value = int(eval(arg))
assert value <= 0xFF
yield value


def escape(byte):
c = chr(byte)
escaped = {
b'\0': br'\0',
b'\t': br'\t',
b'\n': br'\n',
b'\r': br'\r',
b'\'': b'\\\'',
b'\\': br'\\',
}.get(c)
if escaped is not None:
return escaped
elif b' ' <= c <= b'~':
return chr(byte)
else:
return ('\\x%02X' % byte).encode('ascii')


if str is not bytes:
# Python 3.x
ord = lambda x: x
chr = lambda x: bytes([x])


if __name__ == '__main__':
main()
32 changes: 16 additions & 16 deletions src/libcollections/slice.rs
Expand Up @@ -1957,30 +1957,30 @@ mod tests {

#[test]
fn test_starts_with() {
assert!(bytes!("foobar").starts_with(bytes!("foo")));
assert!(!bytes!("foobar").starts_with(bytes!("oob")));
assert!(!bytes!("foobar").starts_with(bytes!("bar")));
assert!(!bytes!("foo").starts_with(bytes!("foobar")));
assert!(!bytes!("bar").starts_with(bytes!("foobar")));
assert!(bytes!("foobar").starts_with(bytes!("foobar")));
assert!(b"foobar".starts_with(b"foo"));
assert!(!b"foobar".starts_with(b"oob"));
assert!(!b"foobar".starts_with(b"bar"));
assert!(!b"foo".starts_with(b"foobar"));
assert!(!b"bar".starts_with(b"foobar"));
assert!(b"foobar".starts_with(b"foobar"));
let empty: &[u8] = [];
assert!(empty.starts_with(empty));
assert!(!empty.starts_with(bytes!("foo")));
assert!(bytes!("foobar").starts_with(empty));
assert!(!empty.starts_with(b"foo"));
assert!(b"foobar".starts_with(empty));
}

#[test]
fn test_ends_with() {
assert!(bytes!("foobar").ends_with(bytes!("bar")));
assert!(!bytes!("foobar").ends_with(bytes!("oba")));
assert!(!bytes!("foobar").ends_with(bytes!("foo")));
assert!(!bytes!("foo").ends_with(bytes!("foobar")));
assert!(!bytes!("bar").ends_with(bytes!("foobar")));
assert!(bytes!("foobar").ends_with(bytes!("foobar")));
assert!(b"foobar".ends_with(b"bar"));
assert!(!b"foobar".ends_with(b"oba"));
assert!(!b"foobar".ends_with(b"foo"));
assert!(!b"foo".ends_with(b"foobar"));
assert!(!b"bar".ends_with(b"foobar"));
assert!(b"foobar".ends_with(b"foobar"));
let empty: &[u8] = [];
assert!(empty.ends_with(empty));
assert!(!empty.ends_with(bytes!("foo")));
assert!(bytes!("foobar").ends_with(empty));
assert!(!empty.ends_with(b"foo"));
assert!(b"foobar".ends_with(empty));
}

#[test]
Expand Down
52 changes: 26 additions & 26 deletions src/libcollections/str.rs
Expand Up @@ -382,7 +382,7 @@ static TAG_CONT_U8: u8 = 128u8;
/// # Example
///
/// ```rust
/// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
/// let input = b"Hello \xF0\x90\x80World";
/// let output = std::str::from_utf8_lossy(input);
/// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
/// ```
Expand All @@ -391,7 +391,7 @@ pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
return Slice(unsafe { mem::transmute(v) })
}

static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
let mut i = 0;
let total = v.len();
fn unsafe_get(xs: &[u8], i: uint) -> u8 {
Expand Down Expand Up @@ -994,7 +994,7 @@ mod tests {
fn test_into_bytes() {
let data = "asdf".to_string();
let buf = data.into_bytes();
assert_eq!(bytes!("asdf"), buf.as_slice());
assert_eq!(b"asdf", buf.as_slice());
}

#[test]
Expand Down Expand Up @@ -2050,58 +2050,58 @@ mod tests {

#[test]
fn test_str_from_utf8() {
let xs = bytes!("hello");
let xs = b"hello";
assert_eq!(from_utf8(xs), Some("hello"));

let xs = bytes!("ศไทย中华Việt Nam");
let xs = "ศไทย中华Việt Nam".as_bytes();
assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));

let xs = bytes!("hello", 0xff);
let xs = b"hello\xFF";
assert_eq!(from_utf8(xs), None);
}

#[test]
fn test_str_from_utf8_owned() {
let xs = Vec::from_slice(bytes!("hello"));
let xs = Vec::from_slice(b"hello");
assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));

let xs = Vec::from_slice(bytes!("ศไทย中华Việt Nam"));
let xs = Vec::from_slice("ศไทย中华Việt Nam".as_bytes());
assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));

let xs = Vec::from_slice(bytes!("hello", 0xff));
let xs = Vec::from_slice(b"hello\xFF");
assert_eq!(from_utf8_owned(xs),
Err(Vec::from_slice(bytes!("hello", 0xff))));
Err(Vec::from_slice(b"hello\xFF")));
}

#[test]
fn test_str_from_utf8_lossy() {
let xs = bytes!("hello");
let xs = b"hello";
assert_eq!(from_utf8_lossy(xs), Slice("hello"));

let xs = bytes!("ศไทย中华Việt Nam");
let xs = "ศไทย中华Việt Nam".as_bytes();
assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));

let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
let xs = b"Hello\xC2 There\xFF Goodbye";
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));

let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));

let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
let xs = b"\xF5foo\xF5\x80bar";
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));

let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));

let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));

let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
foo\U00010000bar".to_string()));

// surrogates
let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
\uFFFD\uFFFD\uFFFDbar".to_string()));
}
Expand Down Expand Up @@ -2298,8 +2298,8 @@ mod bench {
#[bench]
fn is_utf8_100_ascii(b: &mut Bencher) {

let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
Lorem ipsum dolor sit amet, consectetur. ");
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
Lorem ipsum dolor sit amet, consectetur. ";

assert_eq!(100, s.len());
b.iter(|| {
Expand All @@ -2309,7 +2309,7 @@ mod bench {

#[bench]
fn is_utf8_100_multibyte(b: &mut Bencher) {
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
assert_eq!(100, s.len());
b.iter(|| {
is_utf8(s)
Expand All @@ -2318,8 +2318,8 @@ mod bench {

#[bench]
fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
Lorem ipsum dolor sit amet, consectetur. ");
let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
Lorem ipsum dolor sit amet, consectetur. ";

assert_eq!(100, s.len());
b.iter(|| {
Expand All @@ -2329,7 +2329,7 @@ mod bench {

#[bench]
fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
assert_eq!(100, s.len());
b.iter(|| {
let _ = from_utf8_lossy(s);
Expand All @@ -2338,7 +2338,7 @@ mod bench {

#[bench]
fn from_utf8_lossy_invalid(b: &mut Bencher) {
let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
b.iter(|| {
let _ = from_utf8_lossy(s);
});
Expand Down

0 comments on commit 108b8b6

Please sign in to comment.