From 4205559cdc0040743043f4331530072d4c6e54e6 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Sun, 18 Jan 2015 19:50:58 +0100 Subject: [PATCH] [VarDumper] fix handling of non-UTF8 strings --- .../DebugBundle/Resources/config/services.xml | 1 + .../DataCollector/DumpDataCollector.php | 32 +++++-- .../Component/VarDumper/Cloner/Data.php | 31 +------ .../Component/VarDumper/Cloner/VarCloner.php | 5 +- .../VarDumper/Dumper/AbstractDumper.php | 92 ++++++++++++++++++- .../Component/VarDumper/Dumper/CliDumper.php | 18 +++- .../VarDumper/Tests/HtmlDumperTest.php | 31 ++++++- 7 files changed, 161 insertions(+), 49 deletions(-) diff --git a/src/Symfony/Bundle/DebugBundle/Resources/config/services.xml b/src/Symfony/Bundle/DebugBundle/Resources/config/services.xml index bfe98f317bca..915d510fdcc1 100644 --- a/src/Symfony/Bundle/DebugBundle/Resources/config/services.xml +++ b/src/Symfony/Bundle/DebugBundle/Resources/config/services.xml @@ -14,6 +14,7 @@ null + %kernel.charset% diff --git a/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php b/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php index 6099097f366b..05de7215e18c 100644 --- a/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php +++ b/src/Symfony/Component/HttpKernel/DataCollector/DumpDataCollector.php @@ -15,6 +15,7 @@ use Symfony\Component\HttpFoundation\Response; use Symfony\Component\Stopwatch\Stopwatch; use Symfony\Component\VarDumper\Cloner\Data; +use Symfony\Component\VarDumper\Cloner\VarCloner; use Symfony\Component\VarDumper\Dumper\CliDumper; use Symfony\Component\VarDumper\Dumper\HtmlDumper; use Symfony\Component\VarDumper\Dumper\DataDumperInterface; @@ -31,11 +32,13 @@ class DumpDataCollector extends DataCollector implements DataDumperInterface private $clonesCount = 0; private $clonesIndex = 0; private $rootRefs; + private $charset; - public function __construct(Stopwatch $stopwatch = null, $fileLinkFormat = null) + public function __construct(Stopwatch $stopwatch = null, $fileLinkFormat = null, $charset = null) { $this->stopwatch = $stopwatch; $this->fileLinkFormat = $fileLinkFormat ?: ini_get('xdebug.file_link_format') ?: get_cfg_var('xdebug.file_link_format'); + $this->charset = $charset ?: ini_get('php.output_encoding') ?: ini_get('default_charset') ?: 'UTF-8'; // All clones share these properties by reference: $this->rootRefs = array( @@ -98,7 +101,7 @@ public function dump(Data $data) $fileExcerpt = array(); for ($i = max($line - 3, 1), $max = min($line + 3, count($src)); $i <= $max; ++$i) { - $fileExcerpt[] = ''.htmlspecialchars($src[$i - 1]).''; + $fileExcerpt[] = ''.$this->htmlEncode($src[$i - 1]).''; } $fileExcerpt = '
    '.implode("\n", $fileExcerpt).'
'; @@ -158,7 +161,7 @@ public function getDumps($format, $maxDepthLimit = -1, $maxItemsPerDepth = -1) $data = fopen('php://memory', 'r+b'); if ('html' === $format) { - $dumper = new HtmlDumper($data); + $dumper = new HtmlDumper($data, $this->charset); } else { throw new \InvalidArgumentException(sprintf('Invalid dump format: %s', $format)); } @@ -195,10 +198,9 @@ public function __destruct() } if ('cli' !== PHP_SAPI && stripos($h[$i], 'html')) { - echo ''; - $dumper = new HtmlDumper('php://output'); + $dumper = new HtmlDumper('php://output', $this->charset); } else { - $dumper = new CliDumper('php://output'); + $dumper = new CliDumper('php://output', $this->charset); $dumper->setColors(false); } @@ -206,8 +208,8 @@ public function __destruct() $this->data[$i] = null; if ($dumper instanceof HtmlDumper) { - $dump['name'] = htmlspecialchars($dump['name'], ENT_QUOTES, 'UTF-8'); - $dump['file'] = htmlspecialchars($dump['file'], ENT_QUOTES, 'UTF-8'); + $dump['name'] = $this->htmlEncode($dump['name']); + $dump['file'] = $this->htmlEncode($dump['file']); if ('' !== $dump['file']) { if ($this->fileLinkFormat) { $link = strtr($this->fileLinkFormat, array('%f' => $dump['file'], '%l' => $dump['line'])); @@ -227,4 +229,18 @@ public function __destruct() $this->dataCount = 0; } } + + private function htmlEncode($s) + { + $html = ''; + + $dumper = new HtmlDumper(function ($line) use (&$html) {$html .= $line;}, $this->charset); + $dumper->setDumpHeader(''); + $dumper->setDumpBoundaries('', ''); + + $cloner = new VarCloner(); + $dumper->dump($cloner->cloneVar($s)); + + return substr(strip_tags($html), 1, -1); + } } diff --git a/src/Symfony/Component/VarDumper/Cloner/Data.php b/src/Symfony/Component/VarDumper/Cloner/Data.php index 6ef69045e55c..bd2110feff08 100644 --- a/src/Symfony/Component/VarDumper/Cloner/Data.php +++ b/src/Symfony/Component/VarDumper/Cloner/Data.php @@ -176,7 +176,7 @@ private function dumpChildren($dumper, $parentCursor, &$refs, $children, $hashCu $cursor->hashCut = $hashCut; foreach ($children as $key => $child) { $cursor->hashKeyIsBinary = isset($key[0]) && !preg_match('//u', $key); - $cursor->hashKey = $cursor->hashKeyIsBinary ? self::utf8Encode($key) : $key; + $cursor->hashKey = $key; $this->dumpItem($dumper, $cursor, $refs, $child); if (++$cursor->hashIndex === $this->maxItemsPerDepth || $cursor->stop) { $parentCursor->stop = true; @@ -191,33 +191,4 @@ private function dumpChildren($dumper, $parentCursor, &$refs, $children, $hashCu return $hashCut; } - - /** - * Portable variant of utf8_encode() - * - * @param string $s - * - * @return string - * - * @internal - */ - public static function utf8Encode($s) - { - if (function_exists('mb_convert_encoding')) { - return mb_convert_encoding($s, 'UTF-8', 'CP1252'); - } - - $s .= $s; - $len = strlen($s); - - for ($i = $len >> 1, $j = 0; $i < $len; ++$i, ++$j) { - switch (true) { - case $s[$i] < "\x80": $s[$j] = $s[$i]; break; - case $s[$i] < "\xC0": $s[$j] = "\xC2"; $s[++$j] = $s[$i]; break; - default: $s[$j] = "\xC3"; $s[++$j] = chr(ord($s[$i]) - 64); break; - } - } - - return substr($s, 0, $j); - } } diff --git a/src/Symfony/Component/VarDumper/Cloner/VarCloner.php b/src/Symfony/Component/VarDumper/Cloner/VarCloner.php index 1b625d5fdae2..5434d7d18bf6 100644 --- a/src/Symfony/Component/VarDumper/Cloner/VarCloner.php +++ b/src/Symfony/Component/VarDumper/Cloner/VarCloner.php @@ -97,11 +97,10 @@ protected function doClone($var) $stub->class = Stub::STRING_BINARY; if (0 <= $maxString && 0 < $cut = strlen($v) - $maxString) { $stub->cut = $cut; - $cut = substr_replace($v, '', -$cut); + $stub->value = substr($v, 0, -$cut); } else { - $cut = $v; + $stub->value = $v; } - $stub->value = Data::utf8Encode($cut); } elseif (0 <= $maxString && isset($v[1 + ($maxString >> 2)]) && 0 < $cut = iconv_strlen($v, 'UTF-8') - $maxString) { $stub = new Stub(); $stub->type = Stub::TYPE_STRING; diff --git a/src/Symfony/Component/VarDumper/Dumper/AbstractDumper.php b/src/Symfony/Component/VarDumper/Dumper/AbstractDumper.php index bc793499acf4..cc4b2ef49897 100644 --- a/src/Symfony/Component/VarDumper/Dumper/AbstractDumper.php +++ b/src/Symfony/Component/VarDumper/Dumper/AbstractDumper.php @@ -29,11 +29,16 @@ abstract class AbstractDumper implements DataDumperInterface, DumperInterface protected $decimalPoint; // This is locale dependent protected $indentPad = ' '; + private $charset; + private $charsetConverter; + /** - * @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput. + * @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput. + * @param string $charset The default character encoding to use for non-UTF8 strings. */ - public function __construct($output = null) + public function __construct($output = null, $charset = null) { + $this->setCharset($charset ?: ini_get('php.output_encoding') ?: ini_get('default_charset') ?: 'UTF-8'); $this->decimalPoint = (string) 0.5; $this->decimalPoint = $this->decimalPoint[1]; $this->setOutput($output ?: static::$defaultOutput); @@ -67,6 +72,43 @@ public function setOutput($output) return $prev; } + /** + * Sets the default character encoding to use for non-UTF8 strings. + * + * @param string $charset The default character encoding to use for non-UTF8 strings. + * + * @return string The previous charset. + */ + public function setCharset($charset) + { + $prev = $this->charset; + $this->charsetConverter = 'fallback'; + + $charset = strtoupper($charset); + $charset = null === $charset || 'UTF-8' === $charset || 'UTF8' === $charset ? 'CP1252' : $charset; + + $supported = true; + set_error_handler(function () use (&$supported) {$supported = false;}); + + if (function_exists('mb_encoding_aliases') && mb_encoding_aliases($charset)) { + $this->charset = $charset; + $this->charsetConverter = 'mbstring'; + } elseif (function_exists('iconv')) { + $supported = true; + iconv($charset, 'UTF-8', ''); + if ($supported) { + $this->charset = $charset; + $this->charsetConverter = 'iconv'; + } + } + if ('fallback' === $this->charsetConverter) { + $this->charset = 'ISO-8859-1'; + } + restore_error_handler(); + + return $prev; + } + /** * Sets the indentation pad string. * @@ -131,4 +173,50 @@ protected function echoLine($line, $depth, $indentPad) fwrite($this->outputStream, str_repeat($indentPad, $depth).$line."\n"); } } + + /** + * Converts a non-UTF-8 string to UTF-8. + * + * @param string $s The non-UTF-8 string to convert. + * + * @return string The string converted to UTF-8. + */ + protected function utf8Encode($s) + { + if ('mbstring' === $this->charsetConverter) { + return mb_convert_encoding($s, 'UTF-8', mb_check_encoding($s, $this->charset) ? $this->charset : '8bit'); + } + if ('iconv' === $this->charsetConverter) { + $valid = true; + set_error_handler(function () use (&$valid) {$valid = false;}); + $c = iconv($this->charset, 'UTF-8', $s); + restore_error_handler(); + if ($valid) { + return $c; + } + } + + $s .= $s; + $len = strlen($s); + + for ($i = $len >> 1, $j = 0; $i < $len; ++$i, ++$j) { + switch (true) { + case $s[$i] < "\x80": + $s[$j] = $s[$i]; + break; + + case $s[$i] < "\xC0": + $s[$j] = "\xC2"; + $s[++$j] = $s[$i]; + break; + + default: + $s[$j] = "\xC3"; + $s[++$j] = chr(ord($s[$i]) - 64); + break; + } + } + + return substr($s, 0, $j); + } } diff --git a/src/Symfony/Component/VarDumper/Dumper/CliDumper.php b/src/Symfony/Component/VarDumper/Dumper/CliDumper.php index 9b1be7c01b3c..ece7b38abdeb 100644 --- a/src/Symfony/Component/VarDumper/Dumper/CliDumper.php +++ b/src/Symfony/Component/VarDumper/Dumper/CliDumper.php @@ -11,7 +11,6 @@ namespace Symfony\Component\VarDumper\Dumper; -use Symfony\Component\VarDumper\Cloner\Data; use Symfony\Component\VarDumper\Cloner\Cursor; /** @@ -48,9 +47,9 @@ class CliDumper extends AbstractDumper /** * {@inheritdoc} */ - public function __construct($output = null) + public function __construct($output = null, $charset = null) { - parent::__construct($output); + parent::__construct($output, $charset); if ('\\' === DIRECTORY_SEPARATOR && false !== @getenv('ANSICON')) { // Use only the base 16 xterm colors when using ANSICON @@ -140,8 +139,8 @@ public function dumpScalar(Cursor $cursor, $type, $value) break; default: - $attr['value'] = isset($value[0]) && !preg_match('//u', $value) ? Data::utf8Encode($value) : $value; - $value = isset($type[0]) && !preg_match('//u', $type) ? Data::utf8Encode($type) : $type; + $attr['value'] = isset($value[0]) && !preg_match('//u', $value) ? $this->utf8Encode($value) : $value; + $value = isset($type[0]) && !preg_match('//u', $type) ? $this->utf8Encode($type) : $type; break; } @@ -157,6 +156,9 @@ public function dumpString(Cursor $cursor, $str, $bin, $cut) { $this->dumpKey($cursor); + if ($bin) { + $str = $this->utf8Encode($str); + } if ('' === $str) { $this->line .= '""'; $this->dumpLine($cursor->depth); @@ -220,6 +222,9 @@ public function enterHash(Cursor $cursor, $type, $class, $hasChild) { $this->dumpKey($cursor); + if (!preg_match('//u', $class)) { + $class = $this->utf8Encode($class); + } if (Cursor::HASH_OBJECT === $type) { $prefix = 'stdClass' !== $class ? $this->style('note', $class).' {' : '{'; } elseif (Cursor::HASH_RESOURCE === $type) { @@ -279,6 +284,9 @@ protected function dumpEllipsis(Cursor $cursor, $hasChild, $cut) protected function dumpKey(Cursor $cursor) { if (null !== $key = $cursor->hashKey) { + if ($cursor->hashKeyIsBinary) { + $key = $this->utf8Encode($key); + } $attr = array('binary' => $cursor->hashKeyIsBinary); $bin = $cursor->hashKeyIsBinary ? 'b' : ''; $style = 'key'; diff --git a/src/Symfony/Component/VarDumper/Tests/HtmlDumperTest.php b/src/Symfony/Component/VarDumper/Tests/HtmlDumperTest.php index fc8a15f09fa6..22083b516154 100644 --- a/src/Symfony/Component/VarDumper/Tests/HtmlDumperTest.php +++ b/src/Symfony/Component/VarDumper/Tests/HtmlDumperTest.php @@ -24,7 +24,6 @@ public function testGet() require __DIR__.'/Fixtures/dumb-var.php'; $dumper = new HtmlDumper('php://output'); - $dumper->setColors(false); $dumper->setDumpHeader(''); $dumper->setDumpBoundaries('', ''); $cloner = new VarCloner(); @@ -108,6 +107,36 @@ public function testGet() ] +EOTXT + , + + $out + ); + } + + public function testCharset() + { + if (!extension_loaded('mbstring')) { + $this->markTestSkipped('This test requires mbstring.'); + } + $var = mb_convert_encoding('Словарь', 'CP1251', 'UTF-8'); + + $dumper = new HtmlDumper('php://output', 'CP1251'); + $dumper->setDumpHeader(''); + $dumper->setDumpBoundaries('', ''); + $cloner = new VarCloner(); + + $data = $cloner->cloneVar($var); + $out = fopen('php://memory', 'r+b'); + $dumper->dump($data, $out); + rewind($out); + $out = stream_get_contents($out); + + $this->assertStringMatchesFormat( + <<b"Словарь" + + EOTXT ,