Skip to content

Commit

Permalink
[Yaml] Improved support for double quoted values.
Browse files Browse the repository at this point in the history
Added support for the full range of escaped values in double quoted
strings in chapter 5 of the YAML 1.1 and 1.2 specs. The escaping
and unescaping strategies were factored out into separate classes to
keep the logic isolated.

Added examples from the spec to the unit tests for all escaped values.
  • Loading branch information
lewinski committed Mar 5, 2011
1 parent 727326b commit ed338d9
Show file tree
Hide file tree
Showing 5 changed files with 377 additions and 8 deletions.
88 changes: 88 additions & 0 deletions src/Symfony/Component/Yaml/Escaper.php
@@ -0,0 +1,88 @@
<?php

/*
* This file is part of the Symfony package.
* (c) Fabien Potencier <fabien.potencier@symfony-project.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\Yaml;

/**
* Escaper encapsulates escaping rules for single and double-quoted
* YAML strings.
*
* @author Matthew Lewinski <matthew@lewinski.org>
*/
class Escaper
{
// Characters that would cause a dumped string to require double quoting.
const REGEX_CHARACTER_TO_ESCAPE = "[\\x00-\\x1f]|\xc2\x85|\xc2\xa0|\xe2\x80\xa8|\xe2\x80\xa9";

// Mapping arrays for escaping a double quoted string. The backslash is
// first to ensure proper escaping because str_replace operates iteratively
// on the input arrays. This ordering of the characters avoids the use of strtr,
// which performs more slowly.
static private $escapees = array('\\\\', '\\"',
"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07",
"\x08", "\x09", "\x0a", "\x0b", "\x0c", "\x0d", "\x0e", "\x0f",
"\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17",
"\x18", "\x19", "\x1a", "\x1b", "\x1c", "\x1d", "\x1e", "\x1f",
"\xc2\x85", "\xc2\xa0", "\xe2\x80\xa8", "\xe2\x80\xa9");
static private $escaped = array('\\"', '\\\\',
"\\0", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a",
"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "\\x0e", "\\x0f",
"\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17",
"\\x18", "\\x19", "\\x1a", "\\e", "\\x1c", "\\x1d", "\\x1e", "\\x1f",
"\\N", "\\_", "\\L", "\\P");

/**
* Determines if a PHP value would require double quoting in YAML.
*
* @param string $value A PHP value
*
* @return Boolean True if the value would require double quotes.
*/
static public function requiresDoubleQuoting($value)
{
return preg_match('/'.self::REGEX_CHARACTER_TO_ESCAPE.'/u', $value);
}

/**
* Escapes and surrounds a PHP value with double quotes.
*
* @param string $value A PHP value
*
* @return string The quoted, escaped string
*/
static public function escapeWithDoubleQuotes($value)
{
return sprintf('"%s"', str_replace(self::$escapees, self::$escaped, $value));
}

/**
* Determines if a PHP value would require single quoting in YAML.
*
* @param string $value A PHP value
*
* @return Boolean True if the value would require single quotes.
*/
static public function requiresSingleQuoting($value)
{
return preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value);
}

/**
* Escapes and surrounds a PHP value with single quotes.
*
* @param string $value A PHP value
*
* @return string The quoted, escaped string
*/
static public function escapeWithSingleQuotes($value)
{
return sprintf("'%s'", str_replace('\'', '\'\'', $value));
}
}
15 changes: 7 additions & 8 deletions src/Symfony/Component/Yaml/Inline.php
Expand Up @@ -88,10 +88,10 @@ static public function dump($value)
return is_string($value) ? "'$value'" : (int) $value;
case is_numeric($value):
return is_infinite($value) ? str_ireplace('INF', '.Inf', strval($value)) : (is_string($value) ? "'$value'" : $value);
case false !== strpos($value, "\n") || false !== strpos($value, "\r"):
return sprintf('"%s"', str_replace(array('"', "\n", "\r"), array('\\"', '\n', '\r'), $value));
case preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value):
return sprintf("'%s'", str_replace('\'', '\'\'', $value));
case Escaper::requiresDoubleQuoting($value):
return Escaper::escapeWithDoubleQuotes($value);
case Escaper::requiresSingleQuoting($value):
return Escaper::escapeWithSingleQuotes($value);
case '' == $value:
return "''";
case preg_match(self::getTimestampRegex(), $value):
Expand Down Expand Up @@ -197,12 +197,11 @@ static protected function parseQuotedScalar($scalar, &$i)

$output = substr($match[0], 1, strlen($match[0]) - 2);

$unescaper = new Unescaper();
if ('"' == $scalar[$i]) {
// evaluate the string
$output = str_replace(array('\\"', '\\n', '\\r'), array('"', "\n", "\r"), $output);
$output = $unescaper->unescapeDoubleQuotedString($output);
} else {
// unescape '
$output = str_replace('\'\'', '\'', $output);
$output = $unescaper->unescapeSingleQuotedString($output);
}

$i += strlen($match[0]);
Expand Down
142 changes: 142 additions & 0 deletions src/Symfony/Component/Yaml/Unescaper.php
@@ -0,0 +1,142 @@
<?php

/*
* This file is part of the Symfony package.
* (c) Fabien Potencier <fabien.potencier@symfony-project.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\Yaml;

/**
* Unescaper encapsulates unescaping rules for single and double-quoted
* YAML strings.
*
* @author Matthew Lewinski <matthew@lewinski.org>
*/
class Unescaper
{
// Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
// must be converted to that encoding.
const ENCODING = 'UTF-8';

// Regex fragment that matches an escaped character in a double quoted
// string.
const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";

/**
* Unescapes a single quoted string.
*
* @param string $value A single quoted string.
*
* @return string The unescaped string.
*/
public function unescapeSingleQuotedString($value)
{
return str_replace('\'\'', '\'', $value);
}

/**
* Unescapes a double quoted string.
*
* @param string $value A double quoted string.
*
* @return string The unescaped string.
*/
public function unescapeDoubleQuotedString($value)
{
$self = $this;
$callback = function($match) use($self) {
return $self->unescapeCharacter($match[0]);
};

// evaluate the string
return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
}

/**
* Unescapes a character that was found in a double-quoted string
*
* @param string $value An escaped character
*
* @return string The unescaped character
*/
public function unescapeCharacter($value)
{
switch ($value{1}) {
case '0':
return "\x0";
case 'a':
return "\x7";
case 'b':
return "\x8";
case 't':
return "\t";
case "\t":
return "\t";
case 'n':
return "\n";
case 'v':
return "\xb";
case 'f':
return "\xc";
case 'r':
return "\xd";
case 'e':
return "\x1b";
case ' ':
return ' ';
case '"':
return '"';
case '/':
return '/';
case '\\':
return '\\';
case 'N':
// U+0085 NEXT LINE
return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE');
case '_':
// U+00A0 NO-BREAK SPACE
return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE');
case 'L':
// U+2028 LINE SEPARATOR
return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE');
case 'P':
// U+2029 PARAGRAPH SEPARATOR
return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE');
case 'x':
$char = pack('n', hexdec(substr($value, 2, 2)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
case 'u':
$char = pack('n', hexdec(substr($value, 2, 4)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE');
case 'U':
$char = pack('N', hexdec(substr($value, 2, 8)));
return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE');
}
}

/**
* Convert a string from one encoding to another.
*
* @param string $string The string to convert
* @param string $to The input encoding
* @param string $from The output encoding
*
* @return string The string with the new encoding
*
* @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring)
*/
protected function convertEncoding($value, $to, $from)
{
if (function_exists('iconv')) {
return iconv($from, $to, $value);
} elseif (function_exists('mb_convert_encoding')) {
return mb_convert_encoding($value, $to, $from);
}

throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).');
}
}

0 comments on commit ed338d9

Please sign in to comment.