Skip to content

Commit

Permalink
bug #21523 #20411 fix Yaml parsing for very long quoted strings (Rich…
Browse files Browse the repository at this point in the history
…ardBradley)

This PR was squashed before being merged into the 2.7 branch (closes #21523).

Discussion
----------

#20411 fix Yaml parsing for very long quoted strings

| Q             | A
| ------------- | ---
| Branch?       | 2.7
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #20411
| License       | MIT
| Doc PR        | no

This is a second fix for the issue discussed in #20411. My first PR (#21279) didn't fix the bug in all cases, sorry.

If a YAML string has too many spaces in the value, it can trigger a `PREG_BACKTRACK_LIMIT_ERROR` error in the Yaml parser.

There should be no behavioural change other than the bug fix

I have included a test which fails before this fix and passes after this fix.

I have also added checks that detect other PCRE internal errors and throw a more descriptive exception. Before this patch, the YAML engine would often give incorrect results, rather than throwing, on a PCRE `PREG_BACKTRACK_LIMIT_ERROR` error.

Commits
-------

c9a1c09 #20411 fix Yaml parsing for very long quoted strings
  • Loading branch information
fabpot committed Mar 17, 2017
2 parents ac109f1 + c9a1c09 commit ab1d938
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 39 deletions.
16 changes: 8 additions & 8 deletions src/Symfony/Component/Yaml/Inline.php
Expand Up @@ -149,8 +149,8 @@ public static function dump($value, $exceptionOnInvalidType = false, $objectSupp
case Escaper::requiresDoubleQuoting($value):
return Escaper::escapeWithDoubleQuotes($value);
case Escaper::requiresSingleQuoting($value):
case preg_match(self::getHexRegex(), $value):
case preg_match(self::getTimestampRegex(), $value):
case Parser::preg_match(self::getHexRegex(), $value):
case Parser::preg_match(self::getTimestampRegex(), $value):
return Escaper::escapeWithSingleQuotes($value);
default:
return $value;
Expand Down Expand Up @@ -242,10 +242,10 @@ public static function parseScalar($scalar, $delimiters = null, $stringDelimiter
$i += strlen($output);

// remove comments
if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
$output = substr($output, 0, $match[0][1]);
}
} elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
} elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
$output = $match[1];
$i += strlen($output);
} else {
Expand All @@ -272,7 +272,7 @@ public static function parseScalar($scalar, $delimiters = null, $stringDelimiter
*/
private static function parseQuotedScalar($scalar, &$i)
{
if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
}

Expand Down Expand Up @@ -520,16 +520,16 @@ private static function evaluateScalar($scalar, $references = array())

return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
case is_numeric($scalar):
case preg_match(self::getHexRegex(), $scalar):
case Parser::preg_match(self::getHexRegex(), $scalar):
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
case '.inf' === $scalarLower:
case '.nan' === $scalarLower:
return -log(0);
case '-.inf' === $scalarLower:
return log(0);
case preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
case Parser::preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
return (float) str_replace(',', '', $scalar);
case preg_match(self::getTimestampRegex(), $scalar):
case Parser::preg_match(self::getTimestampRegex(), $scalar):
$timeZone = date_default_timezone_get();
date_default_timezone_set('UTC');
$time = strtotime($scalar);
Expand Down
88 changes: 57 additions & 31 deletions src/Symfony/Component/Yaml/Parser.php
Expand Up @@ -61,7 +61,7 @@ public function __construct($offset = 0, $totalNumberOfLines = null, array $skip
*/
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
{
if (!preg_match('//u', $value)) {
if (false === preg_match('//u', $value)) {
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
}
$this->currentLineNb = -1;
Expand Down Expand Up @@ -92,13 +92,13 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
}

$isRef = $mergeNode = false;
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
if ($context && 'mapping' == $context) {
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
}
$context = 'sequence';

if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
$isRef = $matches['ref'];
$values['value'] = $matches['value'];
}
Expand All @@ -108,7 +108,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
} else {
if (isset($values['leadspaces'])
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
&& self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches)
) {
// this is a compact notation element, add to next block and parse
$block = $values['value'];
Expand All @@ -124,7 +124,10 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
if ($isRef) {
$this->refs[$isRef] = end($data);
}
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) {
} elseif (
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
) {
if ($context && 'sequence' == $context) {
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
}
Expand Down Expand Up @@ -191,7 +194,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
$data += $parsed; // array union
}
}
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
} elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
$isRef = $matches['ref'];
$values['value'] = $matches['value'];
}
Expand Down Expand Up @@ -254,27 +257,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
return $value;
}

switch (preg_last_error()) {
case PREG_INTERNAL_ERROR:
$error = 'Internal PCRE error.';
break;
case PREG_BACKTRACK_LIMIT_ERROR:
$error = 'pcre.backtrack_limit reached.';
break;
case PREG_RECURSION_LIMIT_ERROR:
$error = 'pcre.recursion_limit reached.';
break;
case PREG_BAD_UTF8_ERROR:
$error = 'Malformed UTF-8 data.';
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
break;
default:
$error = 'Unable to parse.';
}

throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine);
}
}

Expand Down Expand Up @@ -508,7 +491,7 @@ private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $ob
return $this->refs[$value];
}

if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';

return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
Expand Down Expand Up @@ -554,7 +537,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)

// determine indentation if not specified
if (0 === $indentation) {
if (preg_match('/^ +/', $this->currentLine, $matches)) {
if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
$indentation = strlen($matches[0]);
}
}
Expand All @@ -565,7 +548,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)
while (
$notEOF && (
$isCurrentLineBlank ||
preg_match($pattern, $this->currentLine, $matches)
self::preg_match($pattern, $this->currentLine, $matches)
)
) {
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
Expand Down Expand Up @@ -788,6 +771,49 @@ private function isStringUnIndentedCollectionItem()
*/
private function isBlockScalarHeader()
{
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
}

/**
* A local wrapper for `preg_match` which will throw a ParseException if there
* is an internal error in the PCRE engine.
*
* This avoids us needing to check for "false" every time PCRE is used
* in the YAML engine
*
* @throws ParseException on a PCRE internal error
*
* @see preg_last_error()
*
* @internal
*/
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0)
{
$ret = preg_match($pattern, $subject, $matches, $flags, $offset);
if ($ret === false) {
switch (preg_last_error()) {
case PREG_INTERNAL_ERROR:
$error = 'Internal PCRE error.';
break;
case PREG_BACKTRACK_LIMIT_ERROR:
$error = 'pcre.backtrack_limit reached.';
break;
case PREG_RECURSION_LIMIT_ERROR:
$error = 'pcre.recursion_limit reached.';
break;
case PREG_BAD_UTF8_ERROR:
$error = 'Malformed UTF-8 data.';
break;
case PREG_BAD_UTF8_OFFSET_ERROR:
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
break;
default:
$error = 'Error.';
}

throw new ParseException($error);
}

return $ret;
}
}
12 changes: 12 additions & 0 deletions src/Symfony/Component/Yaml/Tests/ParserTest.php
Expand Up @@ -17,6 +17,7 @@

class ParserTest extends TestCase
{
/** @var Parser */
protected $parser;

protected function setUp()
Expand Down Expand Up @@ -1146,6 +1147,17 @@ public function parserThrowsExceptionWithCorrectLineNumberProvider()
),
);
}

public function testCanParseVeryLongValue()
{
$longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000);
$trickyVal = array('x' => $longStringWithSpaces);

$yamlString = Yaml::dump($trickyVal);
$arrayFromYaml = $this->parser->parse($yamlString);

$this->assertEquals($trickyVal, $arrayFromYaml);
}
}

class B
Expand Down

0 comments on commit ab1d938

Please sign in to comment.