Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TASK: Implement alternative approach to lexical analysis #34

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/setup-php/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ runs:
with:
php-version: ${{ inputs.php-version }}
coverage: xdebug
ini-values: zend.assertions=1

- id: composer-cache
run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT
Expand Down
2 changes: 1 addition & 1 deletion scripts/analyse
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/env bash
#!/usr/bin/env bash

##
## Usage (plain):
Expand Down
3 changes: 2 additions & 1 deletion scripts/test
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/env bash
#!/usr/bin/env bash

##
## Usage (plain):
Expand All @@ -17,5 +17,6 @@
--display-deprecations \
--display-errors \
--display-notices \
--display-warnings \
--coverage-html build/coverage-report \
--coverage-filter src $@
2 changes: 2 additions & 0 deletions src/Language/AST/Node/BinaryOperation/BinaryOperator.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

enum BinaryOperator: string
{
case NULLISH_COALESCE = 'NULLISH_COALESCE';

case AND = 'AND';
case OR = 'OR';

Expand Down
2 changes: 0 additions & 2 deletions src/Language/AST/Node/IntegerLiteral/IntegerFormat.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

namespace PackageFactory\ComponentEngine\Language\AST\Node\IntegerLiteral;

use PackageFactory\ComponentEngine\Parser\Tokenizer\TokenType;

enum IntegerFormat: string
{
case BINARY = 'BINARY';
Expand Down
32 changes: 32 additions & 0 deletions src/Language/AST/Node/TemplateLiteral/TemplateLiteralLine.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

/**
* PackageFactory.ComponentEngine - Universal View Components for PHP
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral;

final class TemplateLiteralLine
{
public function __construct(
public readonly int $indentation,
public readonly TemplateLiteralSegments $segments
) {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,17 @@

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Test\Unit\Parser\Tokenizer;
namespace PackageFactory\ComponentEngine\Language\AST\Node\TemplateLiteral;

use PackageFactory\ComponentEngine\Parser\Source\Source;
use PackageFactory\ComponentEngine\Parser\Tokenizer\Token;
use PackageFactory\ComponentEngine\Parser\Tokenizer\Tokenizer;

final class Fixtures
final class TemplateLiteralLines
{
/**
* @param string $sourceAsString
* @return \Iterator<mixed,Token>
* @var TemplateLiteralLine[]
*/
public static function tokens(string $sourceAsString): \Iterator
{
$source = Source::fromString($sourceAsString);
$tokenizer = Tokenizer::fromSource($source);
public readonly array $items;

return $tokenizer->getIterator();
public function __construct(TemplateLiteralLine ...$items)
{
$this->items = $items;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ final class TemplateLiteralNode extends Node
{
public function __construct(
public readonly Range $rangeInSource,
public readonly TemplateLiteralSegments $segments
public readonly int $indentation,
public readonly TemplateLiteralLines $lines
) {
}
}
112 changes: 112 additions & 0 deletions src/Language/Lexer/Buffer/Buffer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
<?php

/**
* PackageFactory.ComponentEngine - Universal View Components for PHP
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Language\Lexer\Buffer;

use PackageFactory\ComponentEngine\Parser\Source\Position;
use PackageFactory\ComponentEngine\Parser\Source\Range;

final class Buffer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm im just wondering what are the pros and cons of making this thing mutable ...

on the one side, the lexer can expose it as public readonly member but methods like override and reset might always be smelly. Then again, this mutable buffer might be a performance optimization, as we dont need a new object every time.

{
private Position $start;
private int $endLineNumber;
private int $nextEndLineNumber;
private int $endColumnNumber;
private int $nextEndColumnNumber;
private string $contents;

public function __construct()
{
$this->start = Position::zero();
$this->endLineNumber = 0;
$this->nextEndLineNumber = 0;
$this->endColumnNumber = 0;
$this->nextEndColumnNumber = 0;
$this->contents = '';
}

public function getStart(): Position
{
return $this->start;
}

public function getEnd(): Position
{
return Position::from($this->endLineNumber, $this->endColumnNumber);
}

public function getRange(): Range
{
return Range::from($this->getStart(), $this->getEnd());
}

public function getContents(): string
{
return $this->contents;
}

public function append(?string $character): void
{
if ($character === null) {
return;
}

$this->contents .= $character;

$this->endLineNumber = $this->nextEndLineNumber;
$this->endColumnNumber = $this->nextEndColumnNumber;

if ($character === "\n") {
$this->nextEndLineNumber++;
$this->nextEndColumnNumber = 0;
} else {
$this->nextEndColumnNumber++;
}
}

public function flush(): void
{
$this->start = Position::from(
$this->endLineNumber = $this->nextEndLineNumber,
$this->endColumnNumber = $this->nextEndColumnNumber
);

$this->contents = '';
}

public function overwrite(Buffer $other): void
{
$other->start = $this->start;
$other->endLineNumber = $this->endLineNumber;
$other->nextEndLineNumber = $this->nextEndLineNumber;
$other->endColumnNumber = $this->endColumnNumber;
$other->nextEndColumnNumber = $this->nextEndColumnNumber;
$other->contents = $this->contents;
}

public function reset(): void
{
$this->endLineNumber = $this->nextEndLineNumber = $this->start->lineNumber;
$this->endColumnNumber = $this->nextEndColumnNumber = $this->start->columnNumber;
$this->contents = '';
}
}
85 changes: 85 additions & 0 deletions src/Language/Lexer/CharacterStream/CharacterStream.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
<?php

/**
* PackageFactory.ComponentEngine - Universal View Components for PHP
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream;

/**
* @internal
*/
final class CharacterStream
{
private int $byte;
private ?string $characterUnderCursor = '';

public function __construct(private readonly string $source)
{
$this->byte = 0;
$this->next();
}

public function next(): void
{
if ($this->characterUnderCursor === null) {
return;
}

$nextCharacter = $this->source[$this->byte++] ?? null;
if ($nextCharacter === null) {
$this->characterUnderCursor = null;
return;
}

$ord = ord($nextCharacter);
if ($ord >= 0x80) {
$nextCharacter .= $this->source[$this->byte++] ?? '';
}
if ($ord >= 0xe0) {
$nextCharacter .= $this->source[$this->byte++] ?? '';
}
if ($ord >= 0xf0) {
$nextCharacter .= $this->source[$this->byte++] ?? '';
}

$this->characterUnderCursor = $nextCharacter;
}

public function current(): ?string
{
return $this->characterUnderCursor;
}

public function isEnd(): bool
{
return $this->characterUnderCursor === null;
}

public function overwrite(CharacterStream $other): void
{
$other->byte = $this->byte;
$other->characterUnderCursor = $this->characterUnderCursor;
}

public function getRemainder(): string
{
return $this->characterUnderCursor . substr($this->source, $this->byte);
}
}
Loading
Loading