From 252675cc908032aba9fd17e446c28180412339f4 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Wed, 15 May 2024 18:00:02 +0200 Subject: [PATCH] Tokenizers/Comment: bug fix - empty docblock This commit fixes an edge case tokenizer bug, where a - completely empty, not even whitespace - _DocBlock_, would not be tokenized correctly. Without this commit, the `/***/` code snippet was tokenized as: ``` 13 | L10 | C 1 | CC 0 | ( 0) | T_DOC_COMMENT_OPEN_TAG | [ 5]: /***/ 14 | L10 | C 6 | CC 0 | ( 0) | T_DOC_COMMENT_CLOSE_TAG | [ 0]: ``` With the fix applied, it will be tokenized as: ``` 13 | L10 | C 1 | CC 0 | ( 0) | T_DOC_COMMENT_OPEN_TAG | [ 3]: /** 14 | L10 | C 4 | CC 0 | ( 0) | T_DOC_COMMENT_CLOSE_TAG | [ 2]: */ ``` --- src/Tokenizers/Comment.php | 13 +++++++--- .../Comment/SingleLineDocBlockTest.inc | 3 +++ .../Comment/SingleLineDocBlockTest.php | 26 ++++++++++++++++++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/Tokenizers/Comment.php b/src/Tokenizers/Comment.php index 335e296cb9..523ce9e41a 100644 --- a/src/Tokenizers/Comment.php +++ b/src/Tokenizers/Comment.php @@ -41,9 +41,16 @@ public function tokenizeString($string, $eolChar, $stackPtr) extra star when they are used for function and class comments. */ - $char = ($numChars - strlen(ltrim($string, '/*'))); - $openTag = substr($string, 0, $char); - $string = ltrim($string, '/*'); + $char = ($numChars - strlen(ltrim($string, '/*'))); + $lastChars = substr($string, -2); + if ($char === $numChars && $lastChars === '*/') { + // Edge case: docblock without whitespace or contents. + $openTag = substr($string, 0, -2); + $string = $lastChars; + } else { + $openTag = substr($string, 0, $char); + $string = ltrim($string, '/*'); + } $tokens[$stackPtr] = [ 'content' => $openTag, diff --git a/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc b/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc index 923e0fc1b2..88b05ea43c 100644 --- a/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc +++ b/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.inc @@ -3,6 +3,9 @@ /* testEmptyBlockCommentNoWhiteSpace */ /**/ +/* testEmptyDocblockNoWhiteSpace */ +/***/ + /* testEmptyDocblockWithWhiteSpace */ /** */ diff --git a/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.php b/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.php index 14c59c607a..e90b573d23 100644 --- a/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.php +++ b/tests/Core/Tokenizer/Comment/SingleLineDocBlockTest.php @@ -28,6 +28,11 @@ final class SingleLineDocBlockTest extends CommentTestCase public static function dataDocblockOpenerCloser() { return [ + 'Single line docblock: empty, no whitespace' => [ + 'marker' => '/* testEmptyDocblockNoWhiteSpace */', + 'closerOffset' => 1, + 'expectedTags' => [], + ], 'Single line docblock: only whitespace' => [ 'marker' => '/* testEmptyDocblockWithWhiteSpace */', 'closerOffset' => 2, @@ -79,12 +84,31 @@ public function testEmptyBlockCommentNoWhiteSpace() /** - * Verify tokenization of an empty, single line DocBlock. + * Verify tokenization of an empty, single line DocBlock without whitespace between the opener and closer. * * @phpcs:disable Squiz.Arrays.ArrayDeclaration.SpaceBeforeDoubleArrow -- Readability is better with alignment. * * @return void */ + public function testEmptyDocblockNoWhiteSpace() + { + $expectedSequence = [ + [T_DOC_COMMENT_OPEN_TAG => '/**'], + [T_DOC_COMMENT_CLOSE_TAG => '*/'], + ]; + + $target = $this->getTargetToken('/* '.__FUNCTION__.' */', T_DOC_COMMENT_OPEN_TAG); + + $this->checkTokenSequence($target, $expectedSequence); + + }//end testEmptyDocblockNoWhiteSpace() + + + /** + * Verify tokenization of an empty, single line DocBlock. + * + * @return void + */ public function testEmptyDocblockWithWhiteSpace() { $expectedSequence = [