Skip to content

Commit

Permalink
#20411 fix Yaml parsing for very long quoted strings
Browse files Browse the repository at this point in the history
  • Loading branch information
RichardBradley authored and fabpot committed Mar 17, 2017
1 parent 01a0250 commit c9a1c09
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 39 deletions.
16 changes: 8 additions & 8 deletions src/Symfony/Component/Yaml/Inline.php
Expand Up @@ -149,8 +149,8 @@ public static function dump($value, $exceptionOnInvalidType = false, $objectSupp
case Escaper::requiresDoubleQuoting($value):
return Escaper::escapeWithDoubleQuotes($value);
case Escaper::requiresSingleQuoting($value):
case preg_match(self::getHexRegex(), $value):
case preg_match(self::getTimestampRegex(), $value):
case Parser::preg_match(self::getHexRegex(), $value):
case Parser::preg_match(self::getTimestampRegex(), $value):
return Escaper::escapeWithSingleQuotes($value);
default:
return $value;
Expand Down Expand Up @@ -242,10 +242,10 @@ public static function parseScalar($scalar, $delimiters = null, $stringDelimiter
$i += strlen($output);

// remove comments
if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
$output = substr($output, 0, $match[0][1]);
}
} elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
} elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
$output = $match[1];
$i += strlen($output);
} else {
Expand All @@ -272,7 +272,7 @@ public static function parseScalar($scalar, $delimiters = null, $stringDelimiter
*/
private static function parseQuotedScalar($scalar, &$i)
{
if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
}

Expand Down Expand Up @@ -520,16 +520,16 @@ private static function evaluateScalar($scalar, $references = array())

return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
case is_numeric($scalar):
case preg_match(self::getHexRegex(), $scalar):
case Parser::preg_match(self::getHexRegex(), $scalar):
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
case '.inf' === $scalarLower:
case '.nan' === $scalarLower:
return -log(0);
case '-.inf' === $scalarLower:
return log(0);
case preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
case Parser::preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
return (float) str_replace(',', '', $scalar);
case preg_match(self::getTimestampRegex(), $scalar):
case Parser::preg_match(self::getTimestampRegex(), $scalar):
$timeZone = date_default_timezone_get();
date_default_timezone_set('UTC');
$time = strtotime($scalar);
Expand Down
88 changes: 57 additions & 31 deletions src/Symfony/Component/Yaml/Parser.php
Expand Up @@ -61,7 +61,7 @@ public function __construct($offset = 0, $totalNumberOfLines = null, array $skip
*/
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
{
if (!preg_match('//u', $value)) {
if (false === preg_match('//u', $value)) {
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
}
$this->currentLineNb = -1;
Expand Down Expand Up @@ -92,13 +92,13 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
}

$isRef = $mergeNode = false;
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
if ($context && 'mapping' == $context) {
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
}
$context = 'sequence';

if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
$isRef = $matches['ref'];
$values['value'] = $matches['value'];
}
Expand All @@ -108,7 +108,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
} else {
if (isset($values['leadspaces'])
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
&& self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches)
) {
// this is a compact notation element, add to next block and parse
$block = $values['value'];
Expand All @@ -124,7 +124,10 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
if ($isRef) {
$this->refs[$isRef] = end($data);
}
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) {
} elseif (
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
) {
if ($context && 'sequence' == $context) {
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
}
Expand Down Expand Up @@ -203,7 +206,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
}
}
}
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
} elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
$isRef = $matches['ref'];
$values['value'] = $matches['value'];
}
Expand Down Expand Up @@ -266,27 +269,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
return $value;
}

switch (preg_last_error()) {
case PREG_INTERNAL_ERROR:
$error = 'Internal PCRE error.';
break;
case PREG_BACKTRACK_LIMIT_ERROR:
$error = 'pcre.backtrack_limit reached.';
break;
case PREG_RECURSION_LIMIT_ERROR:
$error = 'pcre.recursion_limit reached.';
break;
case PREG_BAD_UTF8_ERROR:
$error = 'Malformed UTF-8 data.';
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
break;
default:
$error = 'Unable to parse.';
}

throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine);
}
}

Expand Down Expand Up @@ -520,7 +503,7 @@ private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $ob
return $this->refs[$value];
}

if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';

return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
Expand Down Expand Up @@ -566,7 +549,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)

// determine indentation if not specified
if (0 === $indentation) {
if (preg_match('/^ +/', $this->currentLine, $matches)) {
if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
$indentation = strlen($matches[0]);
}
}
Expand All @@ -577,7 +560,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)
while (
$notEOF && (
$isCurrentLineBlank ||
preg_match($pattern, $this->currentLine, $matches)
self::preg_match($pattern, $this->currentLine, $matches)
)
) {
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
Expand Down Expand Up @@ -800,6 +783,49 @@ private function isStringUnIndentedCollectionItem()
*/
private function isBlockScalarHeader()
{
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
}

/**
* A local wrapper for `preg_match` which will throw a ParseException if there
* is an internal error in the PCRE engine.
*
* This avoids us needing to check for "false" every time PCRE is used
* in the YAML engine
*
* @throws ParseException on a PCRE internal error
*
* @see preg_last_error()
*
* @internal
*/
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0)
{
$ret = preg_match($pattern, $subject, $matches, $flags, $offset);
if ($ret === false) {
switch (preg_last_error()) {
case PREG_INTERNAL_ERROR:
$error = 'Internal PCRE error.';
break;
case PREG_BACKTRACK_LIMIT_ERROR:
$error = 'pcre.backtrack_limit reached.';
break;
case PREG_RECURSION_LIMIT_ERROR:
$error = 'pcre.recursion_limit reached.';
break;
case PREG_BAD_UTF8_ERROR:
$error = 'Malformed UTF-8 data.';
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
break;
default:
$error = 'Error.';
}

throw new ParseException($error);
}

return $ret;
}
}
12 changes: 12 additions & 0 deletions src/Symfony/Component/Yaml/Tests/ParserTest.php
Expand Up @@ -16,6 +16,7 @@

class ParserTest extends \PHPUnit_Framework_TestCase
{
/** @var Parser */
protected $parser;

protected function setUp()
Expand Down Expand Up @@ -1143,6 +1144,17 @@ public function parserThrowsExceptionWithCorrectLineNumberProvider()
),
);
}

public function testCanParseVeryLongValue()
{
$longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000);
$trickyVal = array('x' => $longStringWithSpaces);

$yamlString = Yaml::dump($trickyVal);
$arrayFromYaml = $this->parser->parse($yamlString);

$this->assertEquals($trickyVal, $arrayFromYaml);
}
}

class B
Expand Down

0 comments on commit c9a1c09

Please sign in to comment.