Skip to content

Commit

Permalink
Move Tokenizer and Parser to core class files
Browse files Browse the repository at this point in the history
  • Loading branch information
dregad committed Apr 30, 2016
1 parent 8107ea5 commit 9d78247
Show file tree
Hide file tree
Showing 3 changed files with 392 additions and 334 deletions.
335 changes: 1 addition & 334 deletions adm_config_set.php
Expand Up @@ -115,7 +115,7 @@
case CONFIG_TYPE_COMPLEX:
default:
try {
$t_parser = new Parser( $f_value );
$t_parser = new ConfigParser( $f_value );
$t_value = $t_parser->parse();
}
catch (Exception $e) {
Expand All @@ -132,339 +132,6 @@

print_successful_redirect( 'adm_config_report.php' );

/**
* Class Tokenizer.
* Uses PHP's internal token_get_all() function to parse a piece of code
* into tokens
*/
class Tokenizer
{
/**
* @var array $tokens
*/
protected $tokens;

/**
* Tokenizer constructor.
* Builds the token array from given code, discarding whitespace and
* trailing semicolons
* @param string $p_code PHP code to tokenize
* @throws Exception if there are no tokens to process
* @throws Exception if given code is not valid
*/
public function __construct( $p_code )
{
if( empty( $p_code ) ) {
throw new Exception( 'No more tokens' );
}

# Check syntax to make sure we get valid PHP code
# prepend 'return' statement to ensure the code is not actually executed
# Suppress errors as we can't capture STDERR with ob_ functions
$result = @eval( 'return; ' . $p_code . ';' );
if( $result === false ) {
throw new Exception( 'Syntax error' );
};

$t_tokens = token_get_all( '<?php ' . $p_code );

# Strip whitespace
$t_tokens = array_filter( $t_tokens,
function( $p_token ) {
return !is_array( $p_token ) || $p_token[0] !== T_WHITESPACE;
}
);

# Get rid of the opening '<?php' tag we added
array_shift( $t_tokens );

# Remove any trailing ';'
while( true ) {
$t_last = end( $t_tokens );
if( $t_last != ';' ) {
break;
}
array_pop( $t_tokens );
}

$this->tokens = $t_tokens;
}

/**
* Return true if we're at the end of the token array.
* @return bool
*/
public function is_empty() {
return empty( $this->tokens );
}

/**
* Retrieves the next token without consuming it.
* @return mixed token
* @throws Exception if there are no more tokens to process
*/
public function get() {
if( $this->is_empty() ) {
throw new Exception( 'No more tokens' );
}
return $this->tokens[0];
}

/**
* Consume the next token and return it.
* @return mixed token
* @throws Exception
*/
public function pop() {
$t_token = $this->get();
array_shift( $this->tokens );
return $t_token;
}

/**
* Get the current token's type.
* @link http://php.net/manual/en/tokens.php
* @return int|string Token number or character
*/
public function type() {
$t_token = $this->get();
return is_array( $t_token ) ? $t_token[0] : $t_token;
}

/**
* Get the current token's value.
* @return int|string Token number or character
*/
public function value() {
$t_token = $this->get();
return is_array( $t_token ) ? $t_token[1] : $t_token;
}

/**
* Return true if the next token matches the given value.
* @param int|string $p_value value to check
* @return bool
*/
public function matches( $p_value ) {
$t_type = $this->type();
return $t_type === $p_value;
}

/**
* Ensures the next token matches the value and consumes it.
* @param int|string $p_value value to check
* @throws Exception if token does not match
*/
public function ensure_matches( $p_value ) {
if( !$this->matches( $p_value ) ) {
if( is_int( $p_value ) ) {
$p_value = token_name( $p_value );
}
throw new Exception(
"Invalid token: got '" . $this->value() . "', expected '$p_value'"
);
}
$this->pop();
}

/**
* Prints the tokens array.
* @TODO For debugging purposes only, should be deleted
*/
public function debug_output()
{
if( count( $this->tokens ) == 0 ) {
echo "Empty !\n";
}
foreach ($this->tokens as $id => $token) {
echo "$id - ";
if( is_array( $token ) ) {
echo token_name($token[0]) . " " . var_export( $token[1], true ) . "\n";
} else {
echo $token;
}
}
}

}

/**
* Class Parser.
* Simple PHP code parser for scalar and array types
*/
class Parser
{
/**
* @var Tokenizer $tokens
*/
protected $tokens;

/**
* Parser constructor.
* @param $p_code PHP code to parse
*/
public function __construct( $p_code ) {
$this->tokens = new Tokenizer( $p_code );
}

/**
* Parse the code for a variable assignment.
* Handles scalar types, and various array types (simple, associative,
* multi-dimentional)
* @return mixed variable
* @throws Exception when there are unexpected or extra tokens
*/
public function parse() {
switch( $this->tokens->type() ) {
case T_ARRAY:
$t_result = $this->process_array();
break;

case T_CONSTANT_ENCAPSED_STRING:
case T_STRING:
case T_LNUMBER:
case T_DNUMBER:
return $this->process_value();

default:
throw new Exception( 'Unexpected token' );
}

# Make sure we have processed all tokens
if( !$this->tokens->is_empty() ) {
$this->tokens->debug_output();
throw new Exception("Extra tokens");
}

return $t_result;
}

/**
* Recursively process array declarations.
* @return array
* @throws Exception when there's an invalid token
*/
protected function process_array() {
$t_array = array();
$t_count = 0;

$this->tokens->ensure_matches( T_ARRAY );
$this->tokens->ensure_matches( '(' );

# Loop until we reach the end of the array
while( !$this->tokens->matches( ')' ) ) {
# A comma is required before each element except the first one
if ($t_count > 0) {
$this->tokens->ensure_matches(',');
}

switch( $this->tokens->type() ) {
# Nested array
case T_ARRAY:
$t_array[] = $this->process_array();
break;

# Value
case T_CONSTANT_ENCAPSED_STRING:
case T_STRING:
case T_LNUMBER:
case T_DNUMBER:
$t_str = $this->process_value();

if( $this->tokens->matches( T_DOUBLE_ARROW ) ) {
# key => value
$this->tokens->pop();
if( $this->tokens->matches( T_ARRAY ) ) {
$t_array[$t_str] = $this->process_array();
} else {
$t_array[$t_str] = $this->process_value();
}
} else {
# Simple value
$t_array[] = $t_str;
}
break;

case ')':
# Cover the trailing ',' case
break;

default:
throw new Exception("Invalid token '" . $this->tokens->value() . "'");
}

$t_count++;
}
$this->tokens->ensure_matches( ')' );

return $t_array;
}

/**
* Process a scalar value.
* Handles string literals including defined constants
* @see constant_replace()
* @return mixed
* @throws Exception when there's an unexpected value
*/
protected function process_value() {
# String literals
if( $this->tokens->matches( T_STRING ) ) {
$t_token = $this->tokens->pop();
$t_value = $t_token[1];

# PHP Standard string literals
switch (strtolower($t_value)) {
case 'null':
return null;
case 'true':
return true;
case 'false':
return false;
}

# Defined constants
$t_value = constant_replace( $t_value );
if( $t_value != $t_token[1] ) {
return $t_value;
}

throw new Exception("Unknown string literal '$t_value'");
}

# Strings
if( $this->tokens->matches( T_CONSTANT_ENCAPSED_STRING ) ) {
$t_value = $this->tokens->pop();
return (string)stripslashes( substr( $t_value[1], 1, -1 ) );
}

# Numbers
$t_negate = 1;
if( $this->tokens->matches( '-' ) ) {
$this->tokens->pop();
$t_negate = -1;
}
if( $this->tokens->matches( '+' ) ) {
$this->tokens->pop();
}

# Integers
if( $this->tokens->matches( T_LNUMBER ) ) {
$t_value = $this->tokens->pop();
return $t_negate * (int)$t_value[1];
}

# Floating point
if( $this->tokens->matches( T_DNUMBER ) ) {
$t_value = $this->tokens->pop();
return $t_negate * (float)$t_value[1];
}

# Anything else
throw new Exception( "Unexpected value" );
}
}

/**
* Check if the passed string is a constant and returns its value
* if yes, or the string itself if not
Expand Down

0 comments on commit 9d78247

Please sign in to comment.