Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). Thia is a

### Fixed

- Slightly better support for escaped characters in Xlsx Reader/Writer. [Discussion #4724](https://github.com/PHPOffice/PhpSpreadsheet/discussions/4724) [PR #4726](https://github.com/PHPOffice/PhpSpreadsheet/pull/4726)
- CODE/UNICODE and CHAR/UNICHAR. [PR #4727](https://github.com/PHPOffice/PhpSpreadsheet/pull/4727)

## 2025-11-24 - 5.3.0
Expand Down
124 changes: 91 additions & 33 deletions src/PhpSpreadsheet/Shared/StringHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -237,57 +237,73 @@ class StringHelper
/**
* Decimal separator.
*/
private static ?string $decimalSeparator = null;
protected static ?string $decimalSeparator = null;

/**
* Thousands separator.
*/
private static ?string $thousandsSeparator = null;
protected static ?string $thousandsSeparator = null;

/**
* Currency code.
*/
private static ?string $currencyCode = null;
protected static ?string $currencyCode = null;

/**
* Is iconv extension available?
*/
private static ?bool $isIconvEnabled = null;
protected static ?bool $isIconvEnabled = null;

/**
* iconv options.
*/
private static string $iconvOptions = '//IGNORE//TRANSLIT';
protected static string $iconvOptions = '//IGNORE//TRANSLIT';

/** @var string[] */
protected static array $iconvOptionsArray = ['//IGNORE//TRANSLIT', '//IGNORE'];

protected static string $iconvName = 'iconv';

protected static bool $iconvTest2 = false;

protected static bool $iconvTest3 = false;

/**
* Get whether iconv extension is available.
*/
public static function getIsIconvEnabled(): bool
{
if (isset(self::$isIconvEnabled)) {
return self::$isIconvEnabled;
if (isset(static::$isIconvEnabled)) {
return static::$isIconvEnabled;
}

// Assume no problems with iconv
self::$isIconvEnabled = true;
static::$isIconvEnabled = true;

// Fail if iconv doesn't exist
if (!function_exists('iconv')) {
self::$isIconvEnabled = false;
} elseif (!@iconv('UTF-8', 'UTF-16LE', 'x')) {
if (!function_exists(static::$iconvName)) {
static::$isIconvEnabled = false;
} elseif (static::$iconvTest2 || !@iconv('UTF-8', 'UTF-16LE', 'x')) {
// Sometimes iconv is not working, and e.g. iconv('UTF-8', 'UTF-16LE', 'x') just returns false,
self::$isIconvEnabled = false;
} elseif (defined('PHP_OS') && @stristr(PHP_OS, 'AIX') && defined('ICONV_IMPL') && (@strcasecmp(ICONV_IMPL, 'unknown') == 0) && defined('ICONV_VERSION') && (@strcasecmp(ICONV_VERSION, 'unknown') == 0)) {
static::$isIconvEnabled = false;
} elseif (static::$iconvTest3 || (defined('PHP_OS') && @stristr(PHP_OS, 'AIX') && defined('ICONV_IMPL') && (@strcasecmp(ICONV_IMPL, 'unknown') == 0) && defined('ICONV_VERSION') && (@strcasecmp(ICONV_VERSION, 'unknown') == 0))) {
// CUSTOM: IBM AIX iconv() does not work
self::$isIconvEnabled = false;
static::$isIconvEnabled = false;
}

// Deactivate iconv default options if they fail (as seen on IMB i)
if (self::$isIconvEnabled && !@iconv('UTF-8', 'UTF-16LE' . self::$iconvOptions, 'x')) {
self::$iconvOptions = '';
// Deactivate iconv default options if they fail (as seen on IBM i-series)
if (static::$isIconvEnabled) {
static::$iconvOptions = '';
foreach (static::$iconvOptionsArray as $option) {
if (@iconv('UTF-8', 'UTF-16LE' . $option, 'x') !== false) {
static::$iconvOptions = $option;

break;
}
}
}

return self::$isIconvEnabled;
return static::$isIconvEnabled;
}

/**
Expand All @@ -305,7 +321,47 @@ public static function getIsIconvEnabled(): bool
*/
public static function controlCharacterOOXML2PHP(string $textValue): string
{
return str_replace(self::CONTROL_CHARACTERS_VALUES, self::CONTROL_CHARACTERS_KEYS, $textValue);
return Preg::replaceCallback('/_x[0-9A-F]{4}_(_xD[CDEF][0-9A-F]{2}_)?/', self::toOutChar(...), $textValue);
}

private static function toHexVal(string $char): int
{
if ($char >= '0' && $char <= '9') {
return ord($char) - ord('0');
}

return ord($char) - ord('A') + 10;
}

/** @param array<?string> $match */
private static function toOutChar(array $match): string
{
/** @var string */
$chars = $match[0];
$h = ((self::toHexVal($chars[2]) << 12)
| (self::toHexVal($chars[3]) << 8)
| (self::toHexVal($chars[4]) << 4)
| (self::toHexVal($chars[5])));
if (strlen($chars) === 7) { // no low surrogate
if ($chars[2] === 'D' && in_array($chars[3], ['8', '9', 'A', 'B', 'C', 'D', 'E', 'F'], true)) {
return '�';
}

return mb_chr($h, 'UTF-8');
}
if ($chars[2] === 'D' && in_array($chars[3], ['C', 'D', 'D', 'F'], true)) {
return '�'; // Excel interprets as one substitute, not 2
}
if ($chars[2] !== 'D' || !in_array($chars[3], ['8', '9', 'A', 'B'], true)) {
return mb_chr($h, 'UTF-8') . '�';
}
$l = ((self::toHexVal($chars[9]) << 12)
| (self::toHexVal($chars[10]) << 8)
| (self::toHexVal($chars[11]) << 4)
| (self::toHexVal($chars[12])));
$result = 0x10000 + ($h - 0xD800) * 0x400 + ($l - 0xDC00);

return mb_chr($result, 'UTF-8');
}

/**
Expand All @@ -323,6 +379,8 @@ public static function controlCharacterOOXML2PHP(string $textValue): string
*/
public static function controlCharacterPHP2OOXML(string $textValue): string
{
$textValue = Preg::replace('/_(x[0-9A-F]{4}_)/', '_x005F_$1', $textValue);

return str_replace(self::CONTROL_CHARACTERS_KEYS, self::CONTROL_CHARACTERS_VALUES, $textValue);
}

Expand Down Expand Up @@ -418,10 +476,10 @@ public static function UTF8toBIFF8UnicodeLong(string $textValue): string
* @param string $to Encoding to convert to, e.g. 'UTF-8'
* @param string $from Encoding to convert from, e.g. 'UTF-16LE'
*/
public static function convertEncoding(string $textValue, string $to, string $from): string
public static function convertEncoding(string $textValue, string $to, string $from, ?string $options = null): string
{
if (static::getIsIconvEnabled()) {
$result = iconv($from, $to . self::$iconvOptions, $textValue);
$result = iconv($from, $to . ($options ?? static::$iconvOptions), $textValue);
if (false !== $result) {
return $result;
}
Expand Down Expand Up @@ -561,11 +619,11 @@ private static function getLocaleValue(string $key, string $altKey, string $defa
*/
public static function getDecimalSeparator(): string
{
if (!isset(self::$decimalSeparator)) {
self::$decimalSeparator = self::getLocaleValue('decimal_point', 'mon_decimal_point', '.');
if (!isset(static::$decimalSeparator)) {
static::$decimalSeparator = self::getLocaleValue('decimal_point', 'mon_decimal_point', '.');
}

return self::$decimalSeparator;
return static::$decimalSeparator;
}

/**
Expand All @@ -576,7 +634,7 @@ public static function getDecimalSeparator(): string
*/
public static function setDecimalSeparator(?string $separator): void
{
self::$decimalSeparator = $separator;
static::$decimalSeparator = $separator;
}

/**
Expand All @@ -585,11 +643,11 @@ public static function setDecimalSeparator(?string $separator): void
*/
public static function getThousandsSeparator(): string
{
if (!isset(self::$thousandsSeparator)) {
self::$thousandsSeparator = self::getLocaleValue('thousands_sep', 'mon_thousands_sep', ',');
if (!isset(static::$thousandsSeparator)) {
static::$thousandsSeparator = self::getLocaleValue('thousands_sep', 'mon_thousands_sep', ',');
}

return self::$thousandsSeparator;
return static::$thousandsSeparator;
}

/**
Expand All @@ -600,7 +658,7 @@ public static function getThousandsSeparator(): string
*/
public static function setThousandsSeparator(?string $separator): void
{
self::$thousandsSeparator = $separator;
static::$thousandsSeparator = $separator;
}

/**
Expand All @@ -609,11 +667,11 @@ public static function setThousandsSeparator(?string $separator): void
*/
public static function getCurrencyCode(bool $trimAlt = false): string
{
if (!isset(self::$currencyCode)) {
self::$currencyCode = self::getLocaleValue('currency_symbol', 'int_curr_symbol', '$', $trimAlt);
if (!isset(static::$currencyCode)) {
static::$currencyCode = self::getLocaleValue('currency_symbol', 'int_curr_symbol', '$', $trimAlt);
}

return self::$currencyCode;
return static::$currencyCode;
}

/**
Expand All @@ -624,7 +682,7 @@ public static function getCurrencyCode(bool $trimAlt = false): string
*/
public static function setCurrencyCode(?string $currencyCode): void
{
self::$currencyCode = $currencyCode;
static::$currencyCode = $currencyCode;
}

/**
Expand Down
130 changes: 130 additions & 0 deletions tests/PhpSpreadsheetTests/Reader/Xlsx/UnderscoreTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
<?php

declare(strict_types=1);

namespace PhpOffice\PhpSpreadsheetTests\Reader\Xlsx;

use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Cell\DefaultValueBinder;
use PhpOffice\PhpSpreadsheet\Reader\Xlsx as XlsxReader;
use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheetTests\Functional\AbstractFunctional;
use PHPUnit\Framework\Attributes\DataProvider;

class UnderscoreTest extends AbstractFunctional
{
private ?Spreadsheet $spreadsheet = null;

private ?Spreadsheet $reloadedSpreadsheet = null;

private const TEST_FILE = 'tests/data/Reader/XLSX/issue.4724.xlsx';

protected function tearDown(): void
{
if ($this->spreadsheet !== null) {
$this->spreadsheet->disconnectWorksheets();
$this->spreadsheet = null;
}
if ($this->reloadedSpreadsheet !== null) {
$this->reloadedSpreadsheet->disconnectWorksheets();
$this->reloadedSpreadsheet = null;
}
}

#[DataProvider('underscoreProvider')]
public function testUnderscore(string $value): void
{
$this->spreadsheet = new Spreadsheet();
$sheet = $this->spreadsheet->getActiveSheet();
$sheet->setCellValue('A1', $value);
$this->reloadedSpreadsheet = $this->writeAndReload($this->spreadsheet, 'Xlsx');
$rsheet = $this->reloadedSpreadsheet->getActiveSheet();
self::assertSame($value, $rsheet->getCell('A1')->getValue());
}

#[DataProvider('underscoreProvider')]
public function testUnderscoreInline(string $value): void
{
$this->spreadsheet = new Spreadsheet();
$sheet = $this->spreadsheet->getActiveSheet();
$sheet->setCellValueExplicit('A1', $value, DataType::TYPE_INLINE);
$this->reloadedSpreadsheet = $this->writeAndReload($this->spreadsheet, 'Xlsx');
$rsheet = $this->reloadedSpreadsheet->getActiveSheet();
self::assertSame($value, $rsheet->getCell('A1')->getValueString());
}

public static function underscoreProvider(): array
{
return [
['A_x0030_'],
['A_x0030_B'],
['A_B'],
];
}

public function testPreliminaries(): void
{
$file = 'zip://';
$file .= self::TEST_FILE;
$file .= '#xl/sharedStrings.xml';
$data = file_get_contents($file);
if ($data === false) {
self::fail('Unable to read file');
} else {
self::assertStringContainsString('count="8"', $data);
self::assertStringContainsString(
"<t>line_x000D_\nwith_x000D_\nbreaks</t>",
$data
);
self::assertStringContainsString('<t>A_x005F_x0030_B</t>', $data);
self::assertStringContainsString(
'<t>ABC_x0031__x0032__x0033_DEF</t>',
$data
);
self::assertStringContainsString('<t>_xC1EF_</t>', $data);
self::assertStringContainsString('<t>_xD801__xDC05_</t>', $data);
self::assertStringContainsString('<t>_xD801__x0038_</t>', $data);
self::assertStringContainsString('<t>_x0039__xDC05_</t>', $data);
self::assertStringContainsString('<t>_xDF39__xDC05_</t>', $data);
}
}

public function testX000dPreserved(): void
{
$reader = new XlsxReader();
$binder = new DefaultValueBinder();
$binder->setPreserveCr(true);
$reader->setValueBinder($binder);
$infile = self::TEST_FILE;
$this->spreadsheet = $reader->load($infile);
$sheet = $this->spreadsheet->getActiveSheet();
$expected = "line\r\nwith\r\nbreaks";
self::assertSame($expected, $sheet->getCell('A1')->getValue());
$expected = 'A_x0030_B';
self::assertSame($expected, $sheet->getCell('A2')->getValue());
$expected = 'ABC123DEF';
self::assertSame($expected, $sheet->getCell('A3')->getValue());
$expected = '쇯';
self::assertSame($expected, $sheet->getCell('A4')->getValue());
$expected = '𐐅';
self::assertSame($expected, $sheet->getCell('A5')->getValue(), 'outside BMP');
$expected = '�8';
self::assertSame($expected, $sheet->getCell('A6')->getValue(), 'high surrogate without low');
$expected = '9�';
self::assertSame($expected, $sheet->getCell('A7')->getValue(), 'low surrogate without high');
$expected = '�';
self::assertSame($expected, $sheet->getCell('A8')->getValue(), '2 low surrogates');
}

public function testX000dNotPreserved(): void
{
$reader = new XlsxReader();
$infile = self::TEST_FILE;
$this->spreadsheet = $reader->load($infile);
$sheet = $this->spreadsheet->getActiveSheet();
$expected = "line\nwith\nbreaks";
self::assertSame($expected, $sheet->getCell('A1')->getValue());
$expected = 'A_x0030_B';
self::assertSame($expected, $sheet->getCell('A2')->getValue());
}
}
10 changes: 3 additions & 7 deletions tests/PhpSpreadsheetTests/Shared/StringHelperNoIconv.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@

class StringHelperNoIconv extends StringHelper
{
/**
* Simulate that iconv is not available.
*/
public static function getIsIconvEnabled(): bool
{
return false;
}
protected static ?bool $isIconvEnabled = null;

protected static string $iconvName = 'simulateIconvUnavilable';
}
Loading