Skip to content

Commit

Permalink
attempt to allow look-behind assertions in tokens
Browse files Browse the repository at this point in the history
instead of consuming matched text, offset is increased
and regex matching is tested from offset.

Note: since we remove the '^' anchor in the preg_match test
this can lead to degraded performance.
  • Loading branch information
CircleCode committed Jun 25, 2013
1 parent 71b32cb commit 46dda6f
Showing 1 changed file with 20 additions and 17 deletions.
37 changes: 20 additions & 17 deletions Llk/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,15 @@ public function lexMe ( $text, Array $tokens ) {
$tokenized = array();
$this->_lexerState = 'default';

while(0 < strlen($this->_text)) {
while($offset < strlen($this->_text)) {

$nextToken = $this->nextToken();
$nextToken = $this->nextToken($offset);

if(null === $nextToken)
throw new \Hoa\Compiler\Exception\UnrecognizedToken(
'Unrecognized token "%s" at line 1 and column %d:' .
"\n" . '%s' . "\n" . str_repeat(' ', $offset) . '',
0, array($this->_text[0], $offset + 1, $text),
0, array($this->_text[$offset], $offset + 1, $text),
1, $offset
);

Expand All @@ -125,7 +125,6 @@ public function lexMe ( $text, Array $tokens ) {
}

$offset += $nextToken['length'];
$this->_text = substr($this->_text, $nextToken['length']);
}

$tokenized[] = array(
Expand All @@ -146,7 +145,7 @@ public function lexMe ( $text, Array $tokens ) {
* @access protected
* @return array
*/
protected function nextToken ( ) {
protected function nextToken ( $offset=0 ) {

$tokenArray = &$this->_tokens[$this->_lexerState];

Expand All @@ -160,7 +159,7 @@ protected function nextToken ( ) {
$nextState = $this->_lexerState;
}

$out = $this->matchLexeme($lexeme, $regexp);
$out = $this->matchLexeme($lexeme, $regexp, $offset);

if(null !== $out) {

Expand All @@ -184,22 +183,26 @@ protected function nextToken ( ) {
* @return array
* @throw \Hoa\Compiler\Exception\Lexer
*/
protected function matchLexeme ( $lexeme, $regexp ) {
protected function matchLexeme ( $lexeme, $regexp, $offset=0 ) {

$_regexp = str_replace('#', '\#', $regexp);

if(0 !== preg_match('#^(?:' . $_regexp . ')#u', $this->_text, $matches)) {
if(0 !== preg_match('#(?:' . $_regexp . ')#u', $this->_text, $matches, PREG_OFFSET_CAPTURE, $offset)) {

if('' === $matches[0])
throw new \Hoa\Compiler\Exception\Lexer(
'A lexeme must not match an empty value, which is the ' .
'case of "%s" (%s).', 1, array($lexeme, $regexp));
$match = $matches[0];
if($offset === $match[1]){
if ('' === $match[0])
throw new \Hoa\Compiler\Exception\Lexer(
'A lexeme must not match an empty value, which is the ' .
'case of "%s" (%s).', 1, array($lexeme, $regexp));

return array(
'token' => $lexeme,
'value' => $match[0],
'length' => strlen($match[0])
);
}

return array(
'token' => $lexeme,
'value' => $matches[0],
'length' => strlen($matches[0])
);
}

return null;
Expand Down

0 comments on commit 46dda6f

Please sign in to comment.