diff --git a/CHANGELOG.md b/CHANGELOG.md index 5419b4c542..78d4299805 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). Thia is a ### Fixed -- Nothing yet. +- CODE/UNICODE and CHAR/UNICHAR. [PR #4727](https://github.com/PHPOffice/PhpSpreadsheet/pull/4727) ## 2025-11-24 - 5.3.0 diff --git a/docs/references/function-list-by-category.md b/docs/references/function-list-by-category.md index 7a8b8728d7..c7f8f6bc90 100644 --- a/docs/references/function-list-by-category.md +++ b/docs/references/function-list-by-category.md @@ -569,8 +569,8 @@ THAINUMSOUND | **Not yet Implemented** THAINUMSTRING | **Not yet Implemented** THAISTRINGLENGTH | **Not yet Implemented** TRIM | \PhpOffice\PhpSpreadsheet\Calculation\TextData\Trim::spaces -UNICHAR | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::character -UNICODE | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::code +UNICHAR | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::characterUnicode +UNICODE | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::codeUnicode UPPER | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CaseConvert::upper VALUE | \PhpOffice\PhpSpreadsheet\Calculation\TextData\Format::VALUE VALUETOTEXT | \PhpOffice\PhpSpreadsheet\Calculation\TextData\Format::valueToText diff --git a/docs/references/function-list-by-name-compact.md b/docs/references/function-list-by-name-compact.md index 6cb6508ee4..ed3ec37214 100644 --- a/docs/references/function-list-by-name-compact.md +++ b/docs/references/function-list-by-name-compact.md @@ -599,8 +599,8 @@ TYPE | INFORMATION | Information\Value::type Excel Function | Category | PhpSpreadsheet Function -------------------------|-----------------------|-------------------------------------- -UNICHAR | TEXT_AND_DATA | TextData\CharacterConvert::character -UNICODE | TEXT_AND_DATA | TextData\CharacterConvert::code +UNICHAR | TEXT_AND_DATA | TextData\CharacterConvert::characterUnicode +UNICODE | TEXT_AND_DATA | TextData\CharacterConvert::codeUnicode UNIQUE | LOOKUP_AND_REFERENCE | LookupRef\Unique::unique UPPER | TEXT_AND_DATA | TextData\CaseConvert::upper USDOLLAR | FINANCIAL | Financial\Dollar::format diff --git a/docs/references/function-list-by-name.md b/docs/references/function-list-by-name.md index 3acf558874..8924d2a74e 100644 --- a/docs/references/function-list-by-name.md +++ b/docs/references/function-list-by-name.md @@ -595,8 +595,8 @@ TYPE | CATEGORY_INFORMATION | \PhpOffice\PhpSpread Excel Function | Category | PhpSpreadsheet Function -------------------------|--------------------------------|-------------------------------------- -UNICHAR | CATEGORY_TEXT_AND_DATA | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::character -UNICODE | CATEGORY_TEXT_AND_DATA | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::code +UNICHAR | CATEGORY_TEXT_AND_DATA | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::characterUnicode +UNICODE | CATEGORY_TEXT_AND_DATA | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert::codeUnicode UNIQUE | CATEGORY_LOOKUP_AND_REFERENCE | \PhpOffice\PhpSpreadsheet\Calculation\LookupRef\Unique::unique UPPER | CATEGORY_TEXT_AND_DATA | \PhpOffice\PhpSpreadsheet\Calculation\TextData\CaseConvert::upper USDOLLAR | CATEGORY_FINANCIAL | \PhpOffice\PhpSpreadsheet\Calculation\Financial\Dollar::format diff --git a/src/PhpSpreadsheet/Calculation/FunctionArray.php b/src/PhpSpreadsheet/Calculation/FunctionArray.php index bc7f3a76d7..25e0e92312 100644 --- a/src/PhpSpreadsheet/Calculation/FunctionArray.php +++ b/src/PhpSpreadsheet/Calculation/FunctionArray.php @@ -2496,12 +2496,12 @@ class FunctionArray extends CalculationBase ], 'UNICHAR' => [ 'category' => Category::CATEGORY_TEXT_AND_DATA, - 'functionCall' => [TextData\CharacterConvert::class, 'character'], + 'functionCall' => [TextData\CharacterConvert::class, 'characterUnicode'], 'argumentCount' => '1', ], 'UNICODE' => [ 'category' => Category::CATEGORY_TEXT_AND_DATA, - 'functionCall' => [TextData\CharacterConvert::class, 'code'], + 'functionCall' => [TextData\CharacterConvert::class, 'codeUnicode'], 'argumentCount' => '1', ], 'UNIQUE' => [ diff --git a/src/PhpSpreadsheet/Calculation/TextData/CharacterConvert.php b/src/PhpSpreadsheet/Calculation/TextData/CharacterConvert.php index 2f15bfcd22..305f1b71f2 100644 --- a/src/PhpSpreadsheet/Calculation/TextData/CharacterConvert.php +++ b/src/PhpSpreadsheet/Calculation/TextData/CharacterConvert.php @@ -6,11 +6,14 @@ use PhpOffice\PhpSpreadsheet\Calculation\Exception as CalcExp; use PhpOffice\PhpSpreadsheet\Calculation\Functions; use PhpOffice\PhpSpreadsheet\Calculation\Information\ExcelError; +use PhpOffice\PhpSpreadsheet\Shared\StringHelper; class CharacterConvert { use ArrayEnabled; + private static string $oneByteCharacterSet = 'Windows-1252'; + /** * CHAR. * @@ -27,19 +30,45 @@ public static function character(mixed $character): array|string return self::evaluateSingleArgumentArray([self::class, __FUNCTION__], $character); } + return self::characterBoth($character, true); + } + + /** @return array|string */ + public static function characterUnicode(mixed $character): array|string + { + if (is_array($character)) { + return self::evaluateSingleArgumentArray([self::class, __FUNCTION__], $character); + } + + return self::characterBoth($character, false); + } + + private static function characterBoth(mixed $character, bool $ansi = true): string + { try { $character = Helpers::validateInt($character, true); } catch (CalcExp $e) { return $e->getMessage(); } + if ($ansi && $character === 219 && self::$oneByteCharacterSet[0] === 'M') { + return '€'; + } + $min = Functions::getCompatibilityMode() === Functions::COMPATIBILITY_OPENOFFICE ? 0 : 1; - if ($character < $min || $character > 255) { + if ($character < $min || ($ansi && $character > 255) || $character > 0x10FFFF) { return ExcelError::VALUE(); } - $result = iconv('UCS-4LE', 'UTF-8', pack('V', $character)); + if ($character > 0x10FFFD) { // last assigned + return ExcelError::NA(); + } + if ($ansi) { + $result = chr($character); - return ($result === false) ? '' : $result; + return (string) iconv(self::$oneByteCharacterSet, 'UTF-8//IGNORE', $result); + } + + return mb_chr($character, 'UTF-8'); } /** @@ -57,7 +86,28 @@ public static function code(mixed $characters): array|string|int if (is_array($characters)) { return self::evaluateSingleArgumentArray([self::class, __FUNCTION__], $characters); } + if (is_bool($characters) && Functions::getCompatibilityMode() === Functions::COMPATIBILITY_OPENOFFICE) { + $characters = $characters ? '1' : '0'; + } + return self::codeBoth(StringHelper::convertToString($characters, convertBool: true), true); + } + + /** @return array|int|string */ + public static function codeUnicode(mixed $characters): array|string|int + { + if (is_array($characters)) { + return self::evaluateSingleArgumentArray([self::class, __FUNCTION__], $characters); + } + if (is_bool($characters) && Functions::getCompatibilityMode() === Functions::COMPATIBILITY_OPENOFFICE) { + $characters = $characters ? '1' : '0'; + } + + return self::codeBoth(StringHelper::convertToString($characters, convertBool: true), false); + } + + private static function codeBoth(string $characters, bool $ansi = true): int|string + { try { $characters = Helpers::extractString($characters, true); } catch (CalcExp $e) { @@ -72,22 +122,27 @@ public static function code(mixed $characters): array|string|int if (mb_strlen($characters, 'UTF-8') > 1) { $character = mb_substr($characters, 0, 1, 'UTF-8'); } + if ($ansi && $character === '€' && self::$oneByteCharacterSet[0] === 'M') { + return 219; + } + + $result = mb_ord($character, 'UTF-8'); + if ($ansi) { + $result = iconv('UTF-8', self::$oneByteCharacterSet . '//IGNORE', $character); + + return ($result !== '') ? ord("$result") : 63; // question mark + } - return self::unicodeToOrd($character); + return $result; } - private static function unicodeToOrd(string $character): int + public static function setWindowsCharacterSet(): void { - $retVal = 0; - $iconv = iconv('UTF-8', 'UCS-4LE', $character); - if ($iconv !== false) { - /** @var false|int[] */ - $result = unpack('V', $iconv); - if (is_array($result) && isset($result[1])) { - $retVal = $result[1]; - } - } + self::$oneByteCharacterSet = 'Windows-1252'; + } - return $retVal; + public static function setMacCharacterSet(): void + { + self::$oneByteCharacterSet = 'MAC'; } } diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CharTest.php b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CharTest.php index 7c9b806263..f092e44294 100644 --- a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CharTest.php +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CharTest.php @@ -5,13 +5,46 @@ namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\TextData; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; +use PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert as CC; use PHPUnit\Framework\Attributes\DataProvider; class CharTest extends AllSetupTeardown { + protected function tearDown(): void + { + parent::tearDown(); + CC::setWindowsCharacterSet(); + } + #[DataProvider('providerCHAR')] public function testCHAR(mixed $expectedResult, mixed $character = 'omitted'): void { + // If expected is array, 1st is for CHAR, 2nd for UNICHAR, + // 3rd is for Mac CHAR if different from Windows. + if (is_array($expectedResult)) { + $expectedResult = $expectedResult[0]; + } + $this->mightHaveException($expectedResult); + $sheet = $this->getSheet(); + if ($character === 'omitted') { + $sheet->getCell('B1')->setValue('=CHAR()'); + } else { + $this->setCell('A1', $character); + $sheet->getCell('B1')->setValue('=CHAR(A1)'); + } + $result = $sheet->getCell('B1')->getCalculatedValue(); + self::assertEquals($expectedResult, $result); + } + + #[DataProvider('providerCHAR')] + public function testMacCHAR(mixed $expectedResult, mixed $character = 'omitted'): void + { + CC::setMacCharacterSet(); + // If expected is array, 1st is for CHAR, 2nd for UNICHAR, + // 3rd is for Mac CHAR if different from Windows. + if (is_array($expectedResult)) { + $expectedResult = $expectedResult[2] ?? $expectedResult[0]; + } $this->mightHaveException($expectedResult); $sheet = $this->getSheet(); if ($character === 'omitted') { diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CodeTest.php b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CodeTest.php index 99f467f933..ddfe1fa554 100644 --- a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CodeTest.php +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/CodeTest.php @@ -5,13 +5,46 @@ namespace PhpOffice\PhpSpreadsheetTests\Calculation\Functions\TextData; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; +use PhpOffice\PhpSpreadsheet\Calculation\TextData\CharacterConvert as CC; use PHPUnit\Framework\Attributes\DataProvider; class CodeTest extends AllSetupTeardown { + protected function tearDown(): void + { + parent::tearDown(); + CC::setWindowsCharacterSet(); + } + #[DataProvider('providerCODE')] public function testCODE(mixed $expectedResult, mixed $character = 'omitted'): void { + // If expected is array, 1st is for CODE, 2nd for UNICODE, + // 3rd is for Mac CODE if different from Windows. + if (is_array($expectedResult)) { + $expectedResult = $expectedResult[0]; + } + $this->mightHaveException($expectedResult); + $sheet = $this->getSheet(); + if ($character === 'omitted') { + $sheet->getCell('B1')->setValue('=CODE()'); + } else { + $this->setCell('A1', $character); + $sheet->getCell('B1')->setValue('=CODE(A1)'); + } + $result = $sheet->getCell('B1')->getCalculatedValue(); + self::assertEquals($expectedResult, $result); + } + + #[DataProvider('providerCODE')] + public function testMacCODE(mixed $expectedResult, mixed $character = 'omitted'): void + { + CC::setMacCharacterSet(); + // If expected is array, 1st is for CODE, 2nd for UNICODE, + // 3rd is for Mac CODE if different from Windows. + if (is_array($expectedResult)) { + $expectedResult = $expectedResult[2] ?? $expectedResult[0]; + } $this->mightHaveException($expectedResult); $sheet = $this->getSheet(); if ($character === 'omitted') { diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/OpenOfficeTest.php b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/OpenOfficeTest.php index 5a73568b6e..2bcad6ca7d 100644 --- a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/OpenOfficeTest.php +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/OpenOfficeTest.php @@ -14,7 +14,7 @@ public function testOpenOffice(mixed $expectedResult, string $formula): void $sheet = $this->getSheet(); $this->setCell('A1', $formula); $result = $sheet->getCell('A1')->getCalculatedValue(); - self::assertEquals($expectedResult, $result); + self::assertSame($expectedResult, $result); } public static function providerOpenOffice(): array diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/UnicharTest.php b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/UnicharTest.php new file mode 100644 index 0000000000..c9d6d9b8b6 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/UnicharTest.php @@ -0,0 +1,56 @@ +mightHaveException($expectedResult); + $sheet = $this->getSheet(); + if ($character === 'omitted') { + $sheet->getCell('B1')->setValue('=UNICHAR()'); + } else { + $this->setCell('A1', $character); + $sheet->getCell('B1')->setValue('=UNICHAR(A1)'); + } + $result = $sheet->getCell('B1')->getCalculatedValue(); + self::assertEquals($expectedResult, $result); + } + + public static function providerCHAR(): array + { + return require 'tests/data/Calculation/TextData/CHAR.php'; + } + + /** @param mixed[] $expectedResult */ + #[DataProvider('providerCharArray')] + public function testCharArray(array $expectedResult, string $array): void + { + $calculation = Calculation::getInstance(); + + $formula = "=UNICHAR({$array})"; + $result = $calculation->calculateFormula($formula); + self::assertSame($expectedResult, $result); + } + + public static function providerCharArray(): array + { + return [ + 'row vector' => [[['P', 'H', 'P']], '{80, 72, 80}'], + 'column vector' => [[['P'], ['H'], ['P']], '{80; 72; 80}'], + 'matrix' => [[['Y', 'o'], ['l', 'o']], '{89, 111; 108, 111}'], + ]; + } +} diff --git a/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/UnicodeTest.php b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/UnicodeTest.php new file mode 100644 index 0000000000..fa36701f69 --- /dev/null +++ b/tests/PhpSpreadsheetTests/Calculation/Functions/TextData/UnicodeTest.php @@ -0,0 +1,56 @@ +mightHaveException($expectedResult); + $sheet = $this->getSheet(); + if ($character === 'omitted') { + $sheet->getCell('B1')->setValue('=UNICODE()'); + } else { + $this->setCell('A1', $character); + $sheet->getCell('B1')->setValue('=UNICODE(A1)'); + } + $result = $sheet->getCell('B1')->getCalculatedValue(); + self::assertEquals($expectedResult, $result); + } + + public static function providerCODE(): array + { + return require 'tests/data/Calculation/TextData/CODE.php'; + } + + /** @param mixed[] $expectedResult */ + #[DataProvider('providerCodeArray')] + public function testCodeArray(array $expectedResult, string $array): void + { + $calculation = Calculation::getInstance(); + + $formula = "=UNICODE({$array})"; + $result = $calculation->calculateFormula($formula); + self::assertSame($expectedResult, $result); + } + + public static function providerCodeArray(): array + { + return [ + 'row vector' => [[[80, 72, 80]], '{"P", "H", "P"}'], + 'column vector' => [[[80], [72], [80]], '{"P"; "H"; "P"}'], + 'matrix' => [[[89, 111], [108, 111]], '{"Y", "o"; "l", "o"}'], + ]; + } +} diff --git a/tests/data/Calculation/TextData/CHAR.php b/tests/data/Calculation/TextData/CHAR.php index 400edd50dd..d31890e0a2 100644 --- a/tests/data/Calculation/TextData/CHAR.php +++ b/tests/data/Calculation/TextData/CHAR.php @@ -28,41 +28,77 @@ 126, ], [ - 'Á', + ['Á', 'Á', '¡'], 193, ], [ - 'ÿ', + ['ÿ', 'ÿ', 'ˇ'], 255, ], [ - '#VALUE!', + ['#VALUE!', 'Ā'], 256, ], [ - '#VALUE!', // '⽇', + ['#VALUE!', '⽇'], 12103, ], [ - '#VALUE!', // 'œ', + ['#VALUE!', 'œ'], 0x153, ], [ - '#VALUE!', // 'ƒ', + ['#VALUE!', 'ƒ'], 0x192, ], [ - '#VALUE!', // '℅', + ['#VALUE!', '℅'], 0x2105, ], [ - '#VALUE!', // '∑', + ['#VALUE!', '∑'], 0x2211, ], [ - '#VALUE!', // '†', + ['#VALUE!', '†'], 0x2020, ], + 'example 1 different location all 3' => [ + ['†', mb_chr(134, 'UTF-8'), 'Ü'], + 134, + ], + 'example 2 different location all 3' => [ + ['€', mb_chr(128, 'UTF-8'), 'Ä'], + 128, + ], + 'non-ascii same win-1252 vs unicode, different mac' => [ + ['Û', 'Û', '€'], + 219, + ], + 'after currency symbol placeholder' => [ + ['Ü', 'Ü', '‹'], + 220, + ], + 'Example 3 where MAC differs from others' => [ + ['Ð', 'Ð', '–'], + 0xD0, + ], + 'last assigned Unicode character' => [ + ['#VALUE!', mb_chr(0x10FFFD, 'UTF-8')], + 0x10FFFD, + ], + 'highest possible code point' => [ + ['#VALUE!', '#N/A'], + 0x10FFFF, + ], + 'above highest possible code point' => [ + '#VALUE!', + 0x110000, + ], + 'nbsp for win/uni, dagger for Mac' => [ + ["\u{A0}", "\u{A0}", '†'], + 160, + ], 'omitted argument' => ['exception'], 'non-printable' => ["\x02", 2], 'bool argument' => ["\x01", true], diff --git a/tests/data/Calculation/TextData/CODE.php b/tests/data/Calculation/TextData/CODE.php index 62bd213b2d..32bb0ff198 100644 --- a/tests/data/Calculation/TextData/CODE.php +++ b/tests/data/Calculation/TextData/CODE.php @@ -2,6 +2,10 @@ declare(strict_types=1); +// Used to test both CODE and UNICODE. +// If expected result is array, 1st entry is for CODE, 2nd for UNICODE, +// and 3rd for CODE using MACROMAN. + return [ [ '#VALUE!', @@ -48,28 +52,44 @@ '£125.00', ], [ - 12103, + [63, 12103], '⽇', ], [ - 0x153, + [156, 0x153, 207], 'œ', ], [ - 0x192, + [131, 0x192, 196], 'ƒ', ], [ - 0x2105, + [63, 0x2105], '℅', ], [ - 0x2211, + [63, 0x2211, 183], '∑', ], [ - 0x2020, + [134, 0x2020, 160], '†', ], + [ + [128, 8364, 219], + '€', + ], + [ + [220, 220, 134], + 'Ü', + ], + 'non-ascii but same win-1252 vs unicode' => [ + [0xD0, 0xD0, 63], + 'Ð', + ], + 'ascii control character' => [ + 2, + "\x02", + ], 'omitted argument' => ['exception'], ]; diff --git a/tests/data/Calculation/TextData/OpenOffice.php b/tests/data/Calculation/TextData/OpenOffice.php index 5c1c4a0356..7618300a26 100644 --- a/tests/data/Calculation/TextData/OpenOffice.php +++ b/tests/data/Calculation/TextData/OpenOffice.php @@ -4,11 +4,12 @@ return [ 'OO allows CHAR(0)' => ["\x00", '=CHAR(0)'], - 'OO treats CODE(bool) as 0/1' => ['48', '=CODE(FALSE)'], + 'OO treats CODE(bool) as 0/1' => [48, '=CODE(FALSE)'], + 'OO treats UNICODE(bool) as 0/1' => [49, '=UNICODE(TRUE)'], 'OO treats bool as string as 0/1 to REPT' => ['111', '=REPT(true, 3)'], 'OO treats bool as string as 0/1 to CLEAN' => ['0', '=CLEAN(false)'], 'OO treats bool as string as 0/1 to TRIM' => ['1', '=TRIM(true)'], - 'OO treats bool as string as 0/1 to LEN' => ['1', '=LEN(false)'], + 'OO treats bool as string as 0/1 to LEN' => [1, '=LEN(false)'], 'OO treats bool as string as 0/1 to EXACT parm 1' => [true, '=EXACT(true, 1)'], 'OO treats bool as string as 0/1 to EXACT parm 2' => [true, '=EXACT(0, false)'], 'OO treats bool as string as 0/1 to FIND parm 1' => [2, '=FIND(true, "210")'], @@ -24,7 +25,7 @@ 'OO treats false as 0 SUBSTITUTE parm 1' => ['6', '=SUBSTITUTE(true, "1", "6")'], 'OO treats true as 1 SUBSTITUTE parm 4' => ['zbcade', '=SUBSTITUTE("abcade", "a", "z", true)'], 'OO TEXT boolean in lieu of string' => ['0', '=TEXT(false, "@")'], - 'OO VALUE boolean in lieu of string' => ['0', '=VALUE(false)'], - 'OO NUMBERVALUE boolean in lieu of string' => ['1', '=NUMBERVALUE(true)'], + 'OO VALUE boolean in lieu of string' => [0.0, '=VALUE(false)'], + 'OO NUMBERVALUE boolean in lieu of string' => [1.0, '=NUMBERVALUE(true)'], 'OO TEXTJOIN boolean in lieu of string' => ['1-0-1', '=TEXTJOIN("-", true, true, false, true)'], ];