Skip to content

Commit

Permalink
bug #13441 [VarDumper] fix handling of non-UTF8 strings (nicolas-grekas)
Browse files Browse the repository at this point in the history
This PR was merged into the 2.6 branch.

Discussion
----------

[VarDumper] fix handling of non-UTF8 strings

| Q             | A
| ------------- | ---
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | #13440
| License       | MIT
| Doc PR        | -

Commits
-------

4205559 [VarDumper] fix handling of non-UTF8 strings
  • Loading branch information
fabpot committed Feb 2, 2015
2 parents c47bbbe + 4205559 commit 88899cb
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 49 deletions.
Expand Up @@ -14,6 +14,7 @@
<tag name="data_collector" id="dump" template="@Debug/Profiler/dump.html.twig" />
<argument type="service" id="debug.stopwatch" on-invalid="ignore" />
<argument>null</argument><!-- %templating.helper.code.file_link_format% -->
<argument>%kernel.charset%</argument>
</service>

<service id="debug.dump_listener" class="Symfony\Component\HttpKernel\EventListener\DumpListener">
Expand Down
Expand Up @@ -15,6 +15,7 @@
use Symfony\Component\HttpFoundation\Response;
use Symfony\Component\Stopwatch\Stopwatch;
use Symfony\Component\VarDumper\Cloner\Data;
use Symfony\Component\VarDumper\Cloner\VarCloner;
use Symfony\Component\VarDumper\Dumper\CliDumper;
use Symfony\Component\VarDumper\Dumper\HtmlDumper;
use Symfony\Component\VarDumper\Dumper\DataDumperInterface;
Expand All @@ -31,11 +32,13 @@ class DumpDataCollector extends DataCollector implements DataDumperInterface
private $clonesCount = 0;
private $clonesIndex = 0;
private $rootRefs;
private $charset;

public function __construct(Stopwatch $stopwatch = null, $fileLinkFormat = null)
public function __construct(Stopwatch $stopwatch = null, $fileLinkFormat = null, $charset = null)
{
$this->stopwatch = $stopwatch;
$this->fileLinkFormat = $fileLinkFormat ?: ini_get('xdebug.file_link_format') ?: get_cfg_var('xdebug.file_link_format');
$this->charset = $charset ?: ini_get('php.output_encoding') ?: ini_get('default_charset') ?: 'UTF-8';

// All clones share these properties by reference:
$this->rootRefs = array(
Expand Down Expand Up @@ -98,7 +101,7 @@ public function dump(Data $data)
$fileExcerpt = array();

for ($i = max($line - 3, 1), $max = min($line + 3, count($src)); $i <= $max; ++$i) {
$fileExcerpt[] = '<li'.($i === $line ? ' class="selected"' : '').'><code>'.htmlspecialchars($src[$i - 1]).'</code></li>';
$fileExcerpt[] = '<li'.($i === $line ? ' class="selected"' : '').'><code>'.$this->htmlEncode($src[$i - 1]).'</code></li>';
}

$fileExcerpt = '<ol start="'.max($line - 3, 1).'">'.implode("\n", $fileExcerpt).'</ol>';
Expand Down Expand Up @@ -158,7 +161,7 @@ public function getDumps($format, $maxDepthLimit = -1, $maxItemsPerDepth = -1)
$data = fopen('php://memory', 'r+b');

if ('html' === $format) {
$dumper = new HtmlDumper($data);
$dumper = new HtmlDumper($data, $this->charset);
} else {
throw new \InvalidArgumentException(sprintf('Invalid dump format: %s', $format));
}
Expand Down Expand Up @@ -195,19 +198,18 @@ public function __destruct()
}

if ('cli' !== PHP_SAPI && stripos($h[$i], 'html')) {
echo '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
$dumper = new HtmlDumper('php://output');
$dumper = new HtmlDumper('php://output', $this->charset);
} else {
$dumper = new CliDumper('php://output');
$dumper = new CliDumper('php://output', $this->charset);
$dumper->setColors(false);
}

foreach ($this->data as $i => $dump) {
$this->data[$i] = null;

if ($dumper instanceof HtmlDumper) {
$dump['name'] = htmlspecialchars($dump['name'], ENT_QUOTES, 'UTF-8');
$dump['file'] = htmlspecialchars($dump['file'], ENT_QUOTES, 'UTF-8');
$dump['name'] = $this->htmlEncode($dump['name']);
$dump['file'] = $this->htmlEncode($dump['file']);
if ('' !== $dump['file']) {
if ($this->fileLinkFormat) {
$link = strtr($this->fileLinkFormat, array('%f' => $dump['file'], '%l' => $dump['line']));
Expand All @@ -227,4 +229,18 @@ public function __destruct()
$this->dataCount = 0;
}
}

private function htmlEncode($s)
{
$html = '';

$dumper = new HtmlDumper(function ($line) use (&$html) {$html .= $line;}, $this->charset);
$dumper->setDumpHeader('');
$dumper->setDumpBoundaries('', '');

$cloner = new VarCloner();
$dumper->dump($cloner->cloneVar($s));

return substr(strip_tags($html), 1, -1);
}
}
31 changes: 1 addition & 30 deletions src/Symfony/Component/VarDumper/Cloner/Data.php
Expand Up @@ -176,7 +176,7 @@ private function dumpChildren($dumper, $parentCursor, &$refs, $children, $hashCu
$cursor->hashCut = $hashCut;
foreach ($children as $key => $child) {
$cursor->hashKeyIsBinary = isset($key[0]) && !preg_match('//u', $key);
$cursor->hashKey = $cursor->hashKeyIsBinary ? self::utf8Encode($key) : $key;
$cursor->hashKey = $key;
$this->dumpItem($dumper, $cursor, $refs, $child);
if (++$cursor->hashIndex === $this->maxItemsPerDepth || $cursor->stop) {
$parentCursor->stop = true;
Expand All @@ -191,33 +191,4 @@ private function dumpChildren($dumper, $parentCursor, &$refs, $children, $hashCu

return $hashCut;
}

/**
* Portable variant of utf8_encode()
*
* @param string $s
*
* @return string
*
* @internal
*/
public static function utf8Encode($s)
{
if (function_exists('mb_convert_encoding')) {
return mb_convert_encoding($s, 'UTF-8', 'CP1252');
}

$s .= $s;
$len = strlen($s);

for ($i = $len >> 1, $j = 0; $i < $len; ++$i, ++$j) {
switch (true) {
case $s[$i] < "\x80": $s[$j] = $s[$i]; break;
case $s[$i] < "\xC0": $s[$j] = "\xC2"; $s[++$j] = $s[$i]; break;
default: $s[$j] = "\xC3"; $s[++$j] = chr(ord($s[$i]) - 64); break;
}
}

return substr($s, 0, $j);
}
}
5 changes: 2 additions & 3 deletions src/Symfony/Component/VarDumper/Cloner/VarCloner.php
Expand Up @@ -97,11 +97,10 @@ protected function doClone($var)
$stub->class = Stub::STRING_BINARY;
if (0 <= $maxString && 0 < $cut = strlen($v) - $maxString) {
$stub->cut = $cut;
$cut = substr_replace($v, '', -$cut);
$stub->value = substr($v, 0, -$cut);
} else {
$cut = $v;
$stub->value = $v;
}
$stub->value = Data::utf8Encode($cut);
} elseif (0 <= $maxString && isset($v[1 + ($maxString >> 2)]) && 0 < $cut = iconv_strlen($v, 'UTF-8') - $maxString) {
$stub = new Stub();
$stub->type = Stub::TYPE_STRING;
Expand Down
92 changes: 90 additions & 2 deletions src/Symfony/Component/VarDumper/Dumper/AbstractDumper.php
Expand Up @@ -29,11 +29,16 @@ abstract class AbstractDumper implements DataDumperInterface, DumperInterface
protected $decimalPoint; // This is locale dependent
protected $indentPad = ' ';

private $charset;
private $charsetConverter;

/**
* @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput.
* @param callable|resource|string|null $output A line dumper callable, an opened stream or an output path, defaults to static::$defaultOutput.
* @param string $charset The default character encoding to use for non-UTF8 strings.
*/
public function __construct($output = null)
public function __construct($output = null, $charset = null)
{
$this->setCharset($charset ?: ini_get('php.output_encoding') ?: ini_get('default_charset') ?: 'UTF-8');
$this->decimalPoint = (string) 0.5;
$this->decimalPoint = $this->decimalPoint[1];
$this->setOutput($output ?: static::$defaultOutput);
Expand Down Expand Up @@ -67,6 +72,43 @@ public function setOutput($output)
return $prev;
}

/**
* Sets the default character encoding to use for non-UTF8 strings.
*
* @param string $charset The default character encoding to use for non-UTF8 strings.
*
* @return string The previous charset.
*/
public function setCharset($charset)
{
$prev = $this->charset;
$this->charsetConverter = 'fallback';

$charset = strtoupper($charset);
$charset = null === $charset || 'UTF-8' === $charset || 'UTF8' === $charset ? 'CP1252' : $charset;

$supported = true;
set_error_handler(function () use (&$supported) {$supported = false;});

if (function_exists('mb_encoding_aliases') && mb_encoding_aliases($charset)) {
$this->charset = $charset;
$this->charsetConverter = 'mbstring';
} elseif (function_exists('iconv')) {
$supported = true;
iconv($charset, 'UTF-8', '');
if ($supported) {
$this->charset = $charset;
$this->charsetConverter = 'iconv';
}
}
if ('fallback' === $this->charsetConverter) {
$this->charset = 'ISO-8859-1';
}
restore_error_handler();

return $prev;
}

/**
* Sets the indentation pad string.
*
Expand Down Expand Up @@ -131,4 +173,50 @@ protected function echoLine($line, $depth, $indentPad)
fwrite($this->outputStream, str_repeat($indentPad, $depth).$line."\n");
}
}

/**
* Converts a non-UTF-8 string to UTF-8.
*
* @param string $s The non-UTF-8 string to convert.
*
* @return string The string converted to UTF-8.
*/
protected function utf8Encode($s)
{
if ('mbstring' === $this->charsetConverter) {
return mb_convert_encoding($s, 'UTF-8', mb_check_encoding($s, $this->charset) ? $this->charset : '8bit');
}
if ('iconv' === $this->charsetConverter) {
$valid = true;
set_error_handler(function () use (&$valid) {$valid = false;});
$c = iconv($this->charset, 'UTF-8', $s);
restore_error_handler();
if ($valid) {
return $c;
}
}

$s .= $s;
$len = strlen($s);

for ($i = $len >> 1, $j = 0; $i < $len; ++$i, ++$j) {
switch (true) {
case $s[$i] < "\x80":
$s[$j] = $s[$i];
break;

case $s[$i] < "\xC0":
$s[$j] = "\xC2";
$s[++$j] = $s[$i];
break;

default:
$s[$j] = "\xC3";
$s[++$j] = chr(ord($s[$i]) - 64);
break;
}
}

return substr($s, 0, $j);
}
}
18 changes: 13 additions & 5 deletions src/Symfony/Component/VarDumper/Dumper/CliDumper.php
Expand Up @@ -11,7 +11,6 @@

namespace Symfony\Component\VarDumper\Dumper;

use Symfony\Component\VarDumper\Cloner\Data;
use Symfony\Component\VarDumper\Cloner\Cursor;

/**
Expand Down Expand Up @@ -48,9 +47,9 @@ class CliDumper extends AbstractDumper
/**
* {@inheritdoc}
*/
public function __construct($output = null)
public function __construct($output = null, $charset = null)
{
parent::__construct($output);
parent::__construct($output, $charset);

if ('\\' === DIRECTORY_SEPARATOR && false !== @getenv('ANSICON')) {
// Use only the base 16 xterm colors when using ANSICON
Expand Down Expand Up @@ -140,8 +139,8 @@ public function dumpScalar(Cursor $cursor, $type, $value)
break;

default:
$attr['value'] = isset($value[0]) && !preg_match('//u', $value) ? Data::utf8Encode($value) : $value;
$value = isset($type[0]) && !preg_match('//u', $type) ? Data::utf8Encode($type) : $type;
$attr['value'] = isset($value[0]) && !preg_match('//u', $value) ? $this->utf8Encode($value) : $value;
$value = isset($type[0]) && !preg_match('//u', $type) ? $this->utf8Encode($type) : $type;
break;
}

Expand All @@ -157,6 +156,9 @@ public function dumpString(Cursor $cursor, $str, $bin, $cut)
{
$this->dumpKey($cursor);

if ($bin) {
$str = $this->utf8Encode($str);
}
if ('' === $str) {
$this->line .= '""';
$this->dumpLine($cursor->depth);
Expand Down Expand Up @@ -220,6 +222,9 @@ public function enterHash(Cursor $cursor, $type, $class, $hasChild)
{
$this->dumpKey($cursor);

if (!preg_match('//u', $class)) {
$class = $this->utf8Encode($class);
}
if (Cursor::HASH_OBJECT === $type) {
$prefix = 'stdClass' !== $class ? $this->style('note', $class).' {' : '{';
} elseif (Cursor::HASH_RESOURCE === $type) {
Expand Down Expand Up @@ -279,6 +284,9 @@ protected function dumpEllipsis(Cursor $cursor, $hasChild, $cut)
protected function dumpKey(Cursor $cursor)
{
if (null !== $key = $cursor->hashKey) {
if ($cursor->hashKeyIsBinary) {
$key = $this->utf8Encode($key);
}
$attr = array('binary' => $cursor->hashKeyIsBinary);
$bin = $cursor->hashKeyIsBinary ? 'b' : '';
$style = 'key';
Expand Down
31 changes: 30 additions & 1 deletion src/Symfony/Component/VarDumper/Tests/HtmlDumperTest.php
Expand Up @@ -24,7 +24,6 @@ public function testGet()
require __DIR__.'/Fixtures/dumb-var.php';

$dumper = new HtmlDumper('php://output');
$dumper->setColors(false);
$dumper->setDumpHeader('<foo></foo>');
$dumper->setDumpBoundaries('<bar>', '</bar>');
$cloner = new VarCloner();
Expand Down Expand Up @@ -108,6 +107,36 @@ public function testGet()
</samp>]
</bar>
EOTXT
,

$out
);
}

public function testCharset()
{
if (!extension_loaded('mbstring')) {
$this->markTestSkipped('This test requires mbstring.');
}
$var = mb_convert_encoding('Словарь', 'CP1251', 'UTF-8');

$dumper = new HtmlDumper('php://output', 'CP1251');
$dumper->setDumpHeader('<foo></foo>');
$dumper->setDumpBoundaries('<bar>', '</bar>');
$cloner = new VarCloner();

$data = $cloner->cloneVar($var);
$out = fopen('php://memory', 'r+b');
$dumper->dump($data, $out);
rewind($out);
$out = stream_get_contents($out);

$this->assertStringMatchesFormat(
<<<EOTXT
<foo></foo><bar>b"<span class=sf-dump-str title="7 binary or non-UTF-8 characters">&#1057;&#1083;&#1086;&#1074;&#1072;&#1088;&#1100;</span>"
</bar>
EOTXT
,

Expand Down

0 comments on commit 88899cb

Please sign in to comment.