Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature #35156 [String] Made AbstractString::width() follow POSIX.1-2…
…001 (fancyweb) This PR was merged into the 5.1-dev branch. Discussion ---------- [String] Made AbstractString::width() follow POSIX.1-2001 | Q | A | ------------- | --- | Branch? | master | Bug fix? | no | New feature? | yes | Deprecations? | no | Tickets | - | License | MIT | Doc PR | - This PR ports the wcswidth() function (see http://man7.org/linux/man-pages/man3/wcwidth.3.html and https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c) into the String component. This new method will be useful in the Console component to determine how many columns a character takes. I kind of copied the Intl data import strategy. Commits ------- 347d825 [String] Made AbstractString::width() follow POSIX.1-2001
- Loading branch information
Showing
12 changed files
with
2,704 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
/Resources/bin/update-data.php export-ignore | ||
/Resources/WcswidthDataGenerator.php export-ignore | ||
/Tests export-ignore | ||
/phpunit.xml.dist export-ignore | ||
/.gitignore export-ignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
113 changes: 113 additions & 0 deletions
113
src/Symfony/Component/String/Resources/WcswidthDataGenerator.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
<?php | ||
|
||
/* | ||
* This file is part of the Symfony package. | ||
* | ||
* (c) Fabien Potencier <fabien@symfony.com> | ||
* | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
*/ | ||
|
||
namespace Symfony\Component\String\Resources; | ||
|
||
use Symfony\Component\HttpClient\HttpClient; | ||
use Symfony\Component\String\Exception\RuntimeException; | ||
use Symfony\Component\VarExporter\VarExporter; | ||
|
||
/** | ||
* @internal | ||
*/ | ||
final class WcswidthDataGenerator | ||
{ | ||
private $outDir; | ||
|
||
private $client; | ||
|
||
public function __construct(string $outDir) | ||
{ | ||
$this->outDir = $outDir; | ||
|
||
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/'); | ||
} | ||
|
||
public function generate(): void | ||
{ | ||
$this->writeWideWidthData(); | ||
|
||
$this->writeZeroWidthData(); | ||
} | ||
|
||
private function writeWideWidthData(): void | ||
{ | ||
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) { | ||
throw new RuntimeException('The Unicode version could not be determined.'); | ||
} | ||
|
||
$version = $matches[1]; | ||
|
||
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) { | ||
throw new RuntimeException('The wide width pattern did not match anything.'); | ||
} | ||
|
||
$this->write('wcswidth_table_wide.php', $version, $matches); | ||
} | ||
|
||
private function writeZeroWidthData(): void | ||
{ | ||
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) { | ||
throw new RuntimeException('The Unicode version could not be determined.'); | ||
} | ||
|
||
$version = $matches[1]; | ||
|
||
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) { | ||
throw new RuntimeException('The zero width pattern did not match anything.'); | ||
} | ||
|
||
$this->write('wcswidth_table_zero.php', $version, $matches); | ||
} | ||
|
||
private function write(string $fileName, string $version, array $rawData): void | ||
{ | ||
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n"; | ||
|
||
if (!file_put_contents($this->outDir.'/'.$fileName, $content)) { | ||
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName)); | ||
} | ||
} | ||
|
||
private function getHeader(string $version): string | ||
{ | ||
$date = (new \DateTimeImmutable())->format('c'); | ||
|
||
return <<<EOT | ||
<?php | ||
/* | ||
* This file has been auto-generated by the Symfony String Component for internal use. | ||
* | ||
* Unicode version: $version | ||
* Date: $date | ||
*/ | ||
EOT; | ||
} | ||
|
||
private function format(array $rawData): array | ||
{ | ||
$data = array_map(static function (array $row): array { | ||
$start = $row[1]; | ||
$end = $row[2] ?? $start; | ||
|
||
return [hexdec($start), hexdec($end)]; | ||
}, $rawData); | ||
|
||
usort($data, static function (array $a, array $b): int { | ||
return $a[0] - $b[0]; | ||
}); | ||
|
||
return $data; | ||
} | ||
} |
55 changes: 55 additions & 0 deletions
55
src/Symfony/Component/String/Resources/bin/update-data.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
<?php | ||
|
||
/* | ||
* This file is part of the Symfony package. | ||
* | ||
* (c) Fabien Potencier <fabien@symfony.com> | ||
* | ||
* For the full copyright and license information, please view the LICENSE | ||
* file that was distributed with this source code. | ||
*/ | ||
|
||
use Symfony\Component\String\Resources\WcswidthDataGenerator; | ||
|
||
error_reporting(E_ALL); | ||
|
||
set_error_handler(static function (int $type, string $msg, string $file, int $line): void { | ||
throw new \ErrorException($msg, 0, $type, $file, $line); | ||
}); | ||
|
||
set_exception_handler(static function (\Throwable $exception): void { | ||
echo "\n"; | ||
|
||
$cause = $exception; | ||
$root = true; | ||
|
||
while (null !== $cause) { | ||
if (!$root) { | ||
echo "Caused by\n"; | ||
} | ||
|
||
echo get_class($cause).': '.$cause->getMessage()."\n"; | ||
echo "\n"; | ||
echo $cause->getFile().':'.$cause->getLine()."\n"; | ||
echo $cause->getTraceAsString()."\n"; | ||
|
||
$cause = $cause->getPrevious(); | ||
$root = false; | ||
} | ||
}); | ||
|
||
$autoload = __DIR__.'/../../vendor/autoload.php'; | ||
|
||
if (!file_exists($autoload)) { | ||
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n"; | ||
|
||
exit(1); | ||
} | ||
|
||
require_once $autoload; | ||
|
||
echo "Generating wcswidth tables data...\n"; | ||
|
||
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate(); | ||
|
||
echo "Done.\n"; |
Oops, something went wrong.