Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Yaml] Improved support for double quoted values.
Added support for the full range of escaped values in double quoted strings in chapter 5 of the YAML 1.1 and 1.2 specs. The escaping and unescaping strategies were factored out into separate classes to keep the logic isolated. Added examples from the spec to the unit tests for all escaped values.
- Loading branch information
Showing
5 changed files
with
377 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
<?php | ||
|
||
/* | ||
* This file is part of the Symfony package. | ||
* (c) Fabien Potencier <fabien.potencier@symfony-project.com> | ||
* | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
*/ | ||
|
||
namespace Symfony\Component\Yaml; | ||
|
||
/** | ||
* Escaper encapsulates escaping rules for single and double-quoted | ||
* YAML strings. | ||
* | ||
* @author Matthew Lewinski <matthew@lewinski.org> | ||
*/ | ||
class Escaper | ||
{ | ||
// Characters that would cause a dumped string to require double quoting. | ||
const REGEX_CHARACTER_TO_ESCAPE = "[\\x00-\\x1f]|\xc2\x85|\xc2\xa0|\xe2\x80\xa8|\xe2\x80\xa9"; | ||
|
||
// Mapping arrays for escaping a double quoted string. The backslash is | ||
// first to ensure proper escaping because str_replace operates iteratively | ||
// on the input arrays. This ordering of the characters avoids the use of strtr, | ||
// which performs more slowly. | ||
static private $escapees = array('\\\\', '\\"', | ||
"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", | ||
"\x08", "\x09", "\x0a", "\x0b", "\x0c", "\x0d", "\x0e", "\x0f", | ||
"\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", | ||
"\x18", "\x19", "\x1a", "\x1b", "\x1c", "\x1d", "\x1e", "\x1f", | ||
"\xc2\x85", "\xc2\xa0", "\xe2\x80\xa8", "\xe2\x80\xa9"); | ||
static private $escaped = array('\\"', '\\\\', | ||
"\\0", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a", | ||
"\\b", "\\t", "\\n", "\\v", "\\f", "\\r", "\\x0e", "\\x0f", | ||
"\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", | ||
"\\x18", "\\x19", "\\x1a", "\\e", "\\x1c", "\\x1d", "\\x1e", "\\x1f", | ||
"\\N", "\\_", "\\L", "\\P"); | ||
|
||
/** | ||
* Determines if a PHP value would require double quoting in YAML. | ||
* | ||
* @param string $value A PHP value | ||
* | ||
* @return Boolean True if the value would require double quotes. | ||
*/ | ||
static public function requiresDoubleQuoting($value) | ||
{ | ||
return preg_match('/'.self::REGEX_CHARACTER_TO_ESCAPE.'/u', $value); | ||
} | ||
|
||
/** | ||
* Escapes and surrounds a PHP value with double quotes. | ||
* | ||
* @param string $value A PHP value | ||
* | ||
* @return string The quoted, escaped string | ||
*/ | ||
static public function escapeWithDoubleQuotes($value) | ||
{ | ||
return sprintf('"%s"', str_replace(self::$escapees, self::$escaped, $value)); | ||
} | ||
|
||
/** | ||
* Determines if a PHP value would require single quoting in YAML. | ||
* | ||
* @param string $value A PHP value | ||
* | ||
* @return Boolean True if the value would require single quotes. | ||
*/ | ||
static public function requiresSingleQuoting($value) | ||
{ | ||
return preg_match('/[ \s \' " \: \{ \} \[ \] , & \* \# \?] | \A[ - ? | < > = ! % @ ` ]/x', $value); | ||
} | ||
|
||
/** | ||
* Escapes and surrounds a PHP value with single quotes. | ||
* | ||
* @param string $value A PHP value | ||
* | ||
* @return string The quoted, escaped string | ||
*/ | ||
static public function escapeWithSingleQuotes($value) | ||
{ | ||
return sprintf("'%s'", str_replace('\'', '\'\'', $value)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
<?php | ||
|
||
/* | ||
* This file is part of the Symfony package. | ||
* (c) Fabien Potencier <fabien.potencier@symfony-project.com> | ||
* | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
*/ | ||
|
||
namespace Symfony\Component\Yaml; | ||
|
||
/** | ||
* Unescaper encapsulates unescaping rules for single and double-quoted | ||
* YAML strings. | ||
* | ||
* @author Matthew Lewinski <matthew@lewinski.org> | ||
*/ | ||
class Unescaper | ||
{ | ||
// Parser and Inline assume UTF-8 encoding, so escaped Unicode characters | ||
// must be converted to that encoding. | ||
const ENCODING = 'UTF-8'; | ||
|
||
// Regex fragment that matches an escaped character in a double quoted | ||
// string. | ||
const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})"; | ||
|
||
/** | ||
* Unescapes a single quoted string. | ||
* | ||
* @param string $value A single quoted string. | ||
* | ||
* @return string The unescaped string. | ||
*/ | ||
public function unescapeSingleQuotedString($value) | ||
{ | ||
return str_replace('\'\'', '\'', $value); | ||
} | ||
|
||
/** | ||
* Unescapes a double quoted string. | ||
* | ||
* @param string $value A double quoted string. | ||
* | ||
* @return string The unescaped string. | ||
*/ | ||
public function unescapeDoubleQuotedString($value) | ||
{ | ||
$self = $this; | ||
$callback = function($match) use($self) { | ||
return $self->unescapeCharacter($match[0]); | ||
}; | ||
|
||
// evaluate the string | ||
return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value); | ||
} | ||
|
||
/** | ||
* Unescapes a character that was found in a double-quoted string | ||
* | ||
* @param string $value An escaped character | ||
* | ||
* @return string The unescaped character | ||
*/ | ||
public function unescapeCharacter($value) | ||
{ | ||
switch ($value{1}) { | ||
case '0': | ||
return "\x0"; | ||
case 'a': | ||
return "\x7"; | ||
case 'b': | ||
return "\x8"; | ||
case 't': | ||
return "\t"; | ||
case "\t": | ||
return "\t"; | ||
case 'n': | ||
return "\n"; | ||
case 'v': | ||
return "\xb"; | ||
case 'f': | ||
return "\xc"; | ||
case 'r': | ||
return "\xd"; | ||
case 'e': | ||
return "\x1b"; | ||
case ' ': | ||
return ' '; | ||
case '"': | ||
return '"'; | ||
case '/': | ||
return '/'; | ||
case '\\': | ||
return '\\'; | ||
case 'N': | ||
// U+0085 NEXT LINE | ||
return $this->convertEncoding("\x00\x85", self::ENCODING, 'UCS-2BE'); | ||
case '_': | ||
// U+00A0 NO-BREAK SPACE | ||
return $this->convertEncoding("\x00\xA0", self::ENCODING, 'UCS-2BE'); | ||
case 'L': | ||
// U+2028 LINE SEPARATOR | ||
return $this->convertEncoding("\x20\x28", self::ENCODING, 'UCS-2BE'); | ||
case 'P': | ||
// U+2029 PARAGRAPH SEPARATOR | ||
return $this->convertEncoding("\x20\x29", self::ENCODING, 'UCS-2BE'); | ||
case 'x': | ||
$char = pack('n', hexdec(substr($value, 2, 2))); | ||
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); | ||
case 'u': | ||
$char = pack('n', hexdec(substr($value, 2, 4))); | ||
return $this->convertEncoding($char, self::ENCODING, 'UCS-2BE'); | ||
case 'U': | ||
$char = pack('N', hexdec(substr($value, 2, 8))); | ||
return $this->convertEncoding($char, self::ENCODING, 'UCS-4BE'); | ||
} | ||
} | ||
|
||
/** | ||
* Convert a string from one encoding to another. | ||
* | ||
* @param string $string The string to convert | ||
* @param string $to The input encoding | ||
* @param string $from The output encoding | ||
* | ||
* @return string The string with the new encoding | ||
* | ||
* @throws \RuntimeException if no suitable encoding function is found (iconv or mbstring) | ||
*/ | ||
protected function convertEncoding($value, $to, $from) | ||
{ | ||
if (function_exists('iconv')) { | ||
return iconv($from, $to, $value); | ||
} elseif (function_exists('mb_convert_encoding')) { | ||
return mb_convert_encoding($value, $to, $from); | ||
} | ||
|
||
throw new \RuntimeException('No suitable convert encoding function (install the iconv or mbstring extension).'); | ||
} | ||
} |
Oops, something went wrong.