diff --git a/src/Symfony/Component/Yaml/Escaper.php b/src/Symfony/Component/Yaml/Escaper.php new file mode 100644 index 000000000000..372dbaf47458 --- /dev/null +++ b/src/Symfony/Component/Yaml/Escaper.php @@ -0,0 +1,88 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\Yaml; + +/** + * Escaper encapsulates escaping rules for single and double-quoted + * YAML strings. + * + * @author Matthew Lewinski + */ +class Escaper +{ + // Characters that would cause a dumped string to require double quoting. + const REGEX_CHARACTER_TO_ESCAPE = "[\\x00-\\x1f]|\xc2\x85|\xc2\xa0|\xe2\x80\xa8|\xe2\x80\xa9"; + + // Mapping arrays for escaping a double quoted string. The backslash is + // first to ensure proper escaping because str_replace operates iteratively + // on the input arrays. This ordering of the characters avoids the use of strtr, + // which performs more slowly. + static private $escapees = array('\\\\', '\\"', + "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", + "\x08", "\x09", "\x0a", "\x0b", "\x0c", "\x0d", "\x0e", "\x0f", + "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", + "\x18", "\x19", "\x1a", "\x1b", "\x1c", "\x1d", "\x1e", "\x1f", + "\xc2\x85", "\xc2\xa0", "\xe2\x80\xa8", "\xe2\x80\xa9"); + static private $escaped = array('\\"', '\\\\', + "\\0", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a", + "\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "\\x0e", "\\x0f", + "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", + "\\x18", "\\x19", "\\x1a", "\\e", "\\x1c", "\\x1d", "\\x1e", "\\x1f", + "\\N", "\\_", "\\L", "\\P"); + + /** + * Determines if a PHP value would require double quoting in YAML. + * + * @param string $value A PHP value + * + * @return Boolean True if the value would require double quotes. + */ + static public function requiresDoubleQuoting($value) + { + return preg_match('/'.self::REGEX_CHARACTER_TO_ESCAPE.'/u', $value); + } + + /** + * Escapes and surrounds a PHP value with double quotes. + * + * @param string $value A PHP value + * + * @return string The quoted, escaped string + */ + static public function escapeWithDoubleQuotes($value) + { + return sprintf('"%s"', str_replace(self::$escapees, self::$escaped, $value)); + } + + /** + * Determines if a PHP value would require single quoting in YAML. + * + * @param string $value A PHP value + * + * @return Boolean True if the value would require single quotes. + */ + static public function requiresSingleQuoting($value) + { + return preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value); + } + + /** + * Escapes and surrounds a PHP value with single quotes. + * + * @param string $value A PHP value + * + * @return string The quoted, escaped string + */ + static public function escapeWithSingleQuotes($value) + { + return sprintf("'%s'", str_replace('\'', '\'\'', $value)); + } +} diff --git a/src/Symfony/Component/Yaml/Inline.php b/src/Symfony/Component/Yaml/Inline.php index c4a2cd4f2d6a..c515be05097d 100644 --- a/src/Symfony/Component/Yaml/Inline.php +++ b/src/Symfony/Component/Yaml/Inline.php @@ -88,10 +88,10 @@ static public function dump($value) return is_string($value) ? "'$value'" : (int) $value; case is_numeric($value): return is_infinite($value) ? str_ireplace('INF', '.Inf', strval($value)) : (is_string($value) ? "'$value'" : $value); - case false !== strpos($value, "\n") || false !== strpos($value, "\r"): - return sprintf('"%s"', str_replace(array('"', "\n", "\r"), array('\\"', '\n', '\r'), $value)); - case preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value): - return sprintf("'%s'", str_replace('\'', '\'\'', $value)); + case Escaper::requiresDoubleQuoting($value): + return Escaper::escapeWithDoubleQuotes($value); + case Escaper::requiresSingleQuoting($value): + return Escaper::escapeWithSingleQuotes($value); case '' == $value: return "''"; case preg_match(self::getTimestampRegex(), $value): @@ -197,12 +197,11 @@ static protected function parseQuotedScalar($scalar, &$i) $output = substr($match[0], 1, strlen($match[0]) - 2); + $unescaper = new Unescaper(); if ('"' == $scalar[$i]) { - // evaluate the string - $output = str_replace(array('\\"', '\\n', '\\r'), array('"', "\n", "\r"), $output); + $output = $unescaper->unescapeDoubleQuotedString($output); } else { - // unescape ' - $output = str_replace('\'\'', '\'', $output); + $output = $unescaper->unescapeSingleQuotedString($output); } $i += strlen($match[0]); diff --git a/src/Symfony/Component/Yaml/Unescaper.php b/src/Symfony/Component/Yaml/Unescaper.php new file mode 100644 index 000000000000..d7969cd48fa3 --- /dev/null +++ b/src/Symfony/Component/Yaml/Unescaper.php @@ -0,0 +1,142 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\Yaml; + +/** + * Unescaper encapsulates unescaping rules for single and double-quoted + * YAML strings. + * + * @author Matthew Lewinski + */ +class Unescaper +{ + // Parser and Inline assume UTF-8 encoding, so escaped Unicode characters + // must be converted to that encoding. + const ENCODING = 'UTF-8'; + + // Regex fragment that matches an escaped character in a double quoted + // string. + const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})"; + + /** + * Unescapes a single quoted string. + * + * @param string $value A single quoted string. + * + * @return string The unescaped string. + */ + public function unescapeSingleQuotedString($value) + { + return str_replace('\'\'', '\'', $value); + } + + /** + * Unescapes a double quoted string. + * + * @param string $value A double quoted string. + * + * @return string The unescaped string. + */ + public function unescapeDoubleQuotedString($value) + { + $self = $this; + $callback = function($match) use($self) { + return $self->unescapeCharacter($match[0]); + }; + + // evaluate the string + return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); + } + + /** + * Unescapes a character that was found in a double-quoted string + * + * @param string $value An escaped character + * + * @return string The unescaped character + */ + public function unescapeCharacter($value) + { + switch ($value{1}) { + case '0': + return "\x0"; + case 'a': + return "\x7"; + case 'b': + return "\x8"; + case 't': + return "\t"; + case "\t": + return "\t"; + case 'n': + return "\n"; + case 'v': + return "\xb"; + case 'f': + return "\xc"; + case 'r': + return "\xd"; + case 'e': + return "\x1b"; + case ' ': + return ' '; + case '"': + return '"'; + case '/': + return '/'; + case '\\': + return '\\'; + case 'N': + // U+0085 NEXT LINE + return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE'); + case '_': + // U+00A0 NO-BREAK SPACE + return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE'); + case 'L': + // U+2028 LINE SEPARATOR + return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE'); + case 'P': + // U+2029 PARAGRAPH SEPARATOR + return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE'); + case 'x': + $char = pack('n', hexdec(substr($value, 2, 2))); + return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); + case 'u': + $char = pack('n', hexdec(substr($value, 2, 4))); + return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); + case 'U': + $char = pack('N', hexdec(substr($value, 2, 8))); + return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE'); + } + } + + /** + * Convert a string from one encoding to another. + * + * @param string $string The string to convert + * @param string $to The input encoding + * @param string $from The output encoding + * + * @return string The string with the new encoding + * + * @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring) + */ + protected function convertEncoding($value, $to, $from) + { + if (function_exists('iconv')) { + return iconv($from, $to, $value); + } elseif (function_exists('mb_convert_encoding')) { + return mb_convert_encoding($value, $to, $from); + } + + throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).'); + } +} diff --git a/tests/Symfony/Tests/Component/Yaml/Fixtures/escapedCharacters.yml b/tests/Symfony/Tests/Component/Yaml/Fixtures/escapedCharacters.yml new file mode 100644 index 000000000000..7f17ed5c12cd --- /dev/null +++ b/tests/Symfony/Tests/Component/Yaml/Fixtures/escapedCharacters.yml @@ -0,0 +1,139 @@ +test: outside double quotes +yaml: | + \0 \ \a \b \n +php: | + "\\0 \\ \\a \\b \\n" +--- +test: null +yaml: | + "\0" +php: | + "\x00" +--- +test: bell +yaml: | + "\a" +php: | + "\x07" +--- +test: backspace +yaml: | + "\b" +php: | + "\x08" +--- +test: horizontal tab (1) +yaml: | + "\t" +php: | + "\x09" +--- +test: horizontal tab (2) +yaml: | + "\ " +php: | + "\x09" +--- +test: line feed +yaml: | + "\n" +php: | + "\x0a" +--- +test: vertical tab +yaml: | + "\v" +php: | + "\x0b" +--- +test: form feed +yaml: | + "\f" +php: | + "\x0c" +--- +test: carriage return +yaml: | + "\r" +php: | + "\x0d" +--- +test: escape +yaml: | + "\e" +php: | + "\x1b" +--- +test: space +yaml: | + "\ " +php: | + "\x20" +--- +test: slash +yaml: | + "\/" +php: | + "\x2f" +--- +test: backslash +yaml: | + "\\" +php: | + "\\" +--- +test: Unicode next line +yaml: | + "\N" +php: | + "\xc2\x85" +--- +test: Unicode non-breaking space +yaml: | + "\_" +php: | + "\xc2\xa0" +--- +test: Unicode line separator +yaml: | + "\L" +php: | + "\xe2\x80\xa8" +--- +test: Unicode paragraph separator +yaml: | + "\P" +php: | + "\xe2\x80\xa9" +--- +test: Escaped 8-bit Unicode +yaml: | + "\x42" +php: | + "B" +--- +test: Escaped 16-bit Unicode +yaml: | + "\u20ac" +php: | + "\xe2\x82\xac" +--- +test: Escaped 32-bit Unicode +yaml: | + "\U00000043" +php: | + "C" +--- +test: Example 5.13 Escaped Characters +note: | + Currently throws an error parsing first line. Maybe Symfony Yaml doesn't support + continuation of string across multiple lines? Keeping test here but disabled. +todo: true +yaml: | + "Fun with \\ + \" \a \b \e \f \ + \n \r \t \v \0 \ + \ \_ \N \L \P \ + \x41 \u0041 \U00000041" +php: | + "Fun with \x5C\n\x22 \x07 \x08 \x1B \x0C\n\x0A \x0D \x09 \x0B \x00\n\x20 \xA0 \x85 \xe2\x80\xa8 \xe2\x80\xa9\nA A A" diff --git a/tests/Symfony/Tests/Component/Yaml/Fixtures/index.yml b/tests/Symfony/Tests/Component/Yaml/Fixtures/index.yml index dea165fe1c7d..a1d7f39e0b90 100644 --- a/tests/Symfony/Tests/Component/Yaml/Fixtures/index.yml +++ b/tests/Symfony/Tests/Component/Yaml/Fixtures/index.yml @@ -1,3 +1,4 @@ +- escapedCharacters - sfComments - sfCompact - sfTests