From ca0efb775ddb0440bb066cb583e72f883ca10aa0 Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Sat, 1 Mar 2025 11:58:11 +0100 Subject: [PATCH 1/8] first step for unit parser --- classes/local/parser.php | 6 +- classes/local/shunting_yard.php | 127 ++++++++++++++++ classes/local/token.php | 3 + classes/local/unit_parser.php | 256 ++++++++++++++++++++++++++++++++ lang/en/qtype_formulas.php | 1 + tests/unit_parser_test.php | 176 ++++++++++++++++++++++ version.php | 2 +- 7 files changed, 567 insertions(+), 4 deletions(-) create mode 100644 classes/local/unit_parser.php create mode 100644 tests/unit_parser_test.php diff --git a/classes/local/parser.php b/classes/local/parser.php index c198b480..eec95421 100644 --- a/classes/local/parser.php +++ b/classes/local/parser.php @@ -33,7 +33,7 @@ class parser { protected array $tokenlist; /** @var int number of (raw) tokens */ - private int $count; + protected int $count; /** @var int position w.r.t. list of (raw) tokens */ private int $position = -1; @@ -98,7 +98,7 @@ private function parse_the_right_thing(token $token) { * * @return void */ - private function check_unbalanced_parens(): void { + protected function check_unbalanced_parens(): void { $parenstack = []; foreach ($this->tokenlist as $token) { $type = $token->type; @@ -399,7 +399,7 @@ private function peek(int $skip = 0): ?token { * * @return token|null */ - private function read_next(): ?token { + protected function read_next(): ?token { $nexttoken = $this->peek(); if ($nexttoken !== self::EOF) { $this->position++; diff --git a/classes/local/shunting_yard.php b/classes/local/shunting_yard.php index da309966..2ffd79dc 100644 --- a/classes/local/shunting_yard.php +++ b/classes/local/shunting_yard.php @@ -528,6 +528,133 @@ public static function infix_to_rpn(array $tokens): array { return $output; } + /** + * Translate unit expression from infix into RPN notation via Dijkstra's shunting yard algorithm, + * because this makes evaluation much easier. + * + * @param array $tokens the tokens forming the expression that is to be translated + * @return array + */ + public static function unit_infix_to_rpn($tokens): array { + $output = []; + $opstack = []; + + $lasttoken = null; + $lasttype = null; + $lastvalue = null; + foreach ($tokens as $token) { + $type = $token->type; + $value = $token->value; + + if (!is_null($lasttoken)) { + $lasttype = $lasttoken->type; + $lastvalue = $lasttoken->value; + } + + // Insert inplicit multiplication sign between two consecutive UNIT tokens. + // For accurate error reporting, the row and column number of the implicit + // multiplication token are copied over from the current token which triggered + // the multiplication. + $unitunit = ($lasttype === token::UNIT && $type === token::UNIT); + $unitparen = ($lasttype === token::UNIT && $type === token::OPENING_PAREN); + $parenunit = ($lasttype === token::CLOSING_PAREN && $type === token::UNIT); + $parenparen = ($lasttype === token::CLOSING_PAREN && $type === token::OPENING_PAREN); + if ($unitunit || $unitparen || $parenunit || $parenparen) { + // For backwards compatibility, division will have a lower precedence than multiplication, + // in order for J / m K to be interpreted as J / (m K). Instead of introducing a special + // 'unit multiplication' pseudo-operator, we simply increase the multiplication's precedence + // by one when flushing operators from the opstack. + self::flush_higher_precedence($opstack, self::get_precedence('*') + 1, $output); + $opstack[] = new token(token::OPERATOR, '*', $token->row, $token->column); + } + + // Two consecutive operators are only possible if the unary minus follows exponentiation. + // Note: We do not have to check whether the first of them is exponentiation, because we + // only allow - in the exponent anyway. + if ($type === token::OPERATOR && $lasttype === token::OPERATOR && $value !== '-') { + self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); + } + + switch ($type) { + // UNIT tokens go straight to the output queue. + case token::UNIT: + $output[] = $token; + break; + + // Numbers go to the output queue. + case token::NUMBER: + // If the last token was the unary minus, we multiply the number by -1 before + // sending it to the output queue. Afterwards, we can remove the minus from the opstack. + if ($lasttype === token::OPERATOR && $lastvalue === '-') { + $token->value = -$token->value; + array_pop($opstack); + } + $output[] = $token; + break; + + // Opening parentheses go straight to the operator stack. + case token::OPENING_PAREN: + $opstack[] = $token; + break; + + // A closing parenthesis means we flush all operators until we get to the + // matching opening parenthesis. + case token::CLOSING_PAREN: + // A closing parenthesis must not occur immediately after an operator. + if ($lasttype === token::OPERATOR) { + self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); + } + self::flush_until_paren($opstack, token::OPENING_PAREN, $output); + break; + + // Deal with all the possible operators... + case token::OPERATOR: + // Expressions must not start with an operator. + if (is_null($lasttoken)) { + self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); + } + // Operators must not follow an opening parenthesis, except for the unary minus. + if ($lasttype === token::OPENING_PAREN && $value !== '-') { + self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); + } + // Before fetching the precedence, we must translate ^ (caret) into **, because + // the ^ operator normally has a different meaning with lower precedence. + if ($value === '^') { + $value = '**'; + } + $thisprecedence = self::get_precedence($value); + // We artificially increase the precedence of the division operator, because + // legacy versions used implicit parens around the denominator, e. g. + // the expression J / m K would be interpreted as J / (m * K). This is consistent + // with what tools like Wolfram Alpha do, even though e. g. 1 / 2 3 would be read + // as 3/2 both by Formulas Question and Wolfram Alpha. And even if it were not, it + // is not possible to change that, because it could break existing questions. + if ($value === '*') { + $thisprecedence++; + } + // Flush operators with higher precedence, unless we have a unary minus, because + // it is not left-associative. + if ($value !== '-') { + self::flush_higher_precedence($opstack, $thisprecedence, $output); + } + // Put the operator on the stack. + $opstack[] = $token; + break; + + // If we still haven't dealt with the token, there must be a problem with the input. + default: + self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); + + } + + $lasttoken = $token; + } + // After last token, flush opstack. Last token must be either a number (in exponent), + // a closing parenthesis or a unit. + self::flush_all($opstack, $output); + return $output; + } + /** * Stop processing and indicate the human readable position (row/column) where the error occurred. * diff --git a/classes/local/token.php b/classes/local/token.php index 4e34c8c0..922dec20 100644 --- a/classes/local/token.php +++ b/classes/local/token.php @@ -123,6 +123,9 @@ class token { /** @var int used to designate a token storing an end-of-group marker (closing brace) */ const END_GROUP = 4194304; + /** @var int used to designate a token storing a unit */ + const UNIT = 8388608; + /** @var mixed the token's content, will be the name for identifiers */ public $value; diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php new file mode 100644 index 00000000..0b170817 --- /dev/null +++ b/classes/local/unit_parser.php @@ -0,0 +1,256 @@ +. + +namespace qtype_formulas\local; + +/* + +Notes about current implementation: + +- only 2 operators allowed: ^ for exponentiation and / for division +- only one / allowed +- no * allowed +- no parens allowed, except in exponent or around the *entire* denominator +- right side of / is considered in parens, even if not written, e.g. J/m*K --> J / (m*K) +- only units, no numbers except for exponents +- positive or negative exponents allowed +- negative exponents allowed with or without parens +- same unit not allowed more than once + +Future implementation, must be 100% backwards compatible + +- allow parens everywhere +- allow * for explicit multiplication of units +- still only allow one / +- still not allow same unit more than once +- if * is used after /, assume implicit parens, e. g. J / m * K --> J / (m * K) +- do not allow operators other than *, / and ^ as well as unary - (in exponents only) +- allow ** instead of ^ + +*/ + + +/** + * Parser for units for qtype_formulas + * + * @package qtype_formulas + * @copyright 2025 Philipp Imhof + * @license https://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class unit_parser extends parser { + + /** @var array list of used units */ + private array $unitlist = []; + + /** + * Create a unit parser class and have it parse a given input. The input can be given as a string, in + * which case it will first be sent to the lexer. If that step has already been made, the constructor + * also accepts a list of tokens. + * + * @param string|array $tokenlist list of tokens as returned from the lexer or input string + */ + public function __construct($tokenlist) { + // If the input is given as a string, run it through the lexer first. + if (is_string($tokenlist)) { + $lexer = new lexer($tokenlist); + $tokenlist = $lexer->get_tokens(); + } + $this->tokenlist = $tokenlist; + $this->count = count($tokenlist); + + // Check for unbalanced / mismatched parentheses. + $this->check_parens(); + + // Whether we have already seen a slash or the number one (except in exponents). + $seenslash = false; + $seenunit = false; + $inexponent = false; + foreach ($tokenlist as $token) { + // The use of functions is not permitted in units, so all identifiers will be classified + // as UNIT tokens. + if ($token->type === token::IDENTIFIER) { + // If inside an exponent, only numbers (and maybe the unary minus) are allowed. + if ($inexponent) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + // The same unit must not be used more than once. + if ($this->has_unit_been_used($token)) { + $this->die('Unit already used: ' . $token->value, $token); + } + $this->unitlist[] = $token->value; + $token->type = token::UNIT; + $seenunit = true; + continue; + } + + // Do various syntax checks for operators. + if ($token->type === token::OPERATOR) { + // We can only accept an operator if there has been at least one unit before. + if (!$seenunit) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + // The only operators allowed are exponentiation, multiplication, division and the unary minus. + // Note that the caret (^) always means exponentiation in the context of units. + if (!in_array($token->value, ['^', '**', '/', '*', '-'])) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + // The unary minus is only allowed inside an exponent. + if ($token->value === '-' && !$inexponent) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + // Only the unary minus is allowed inside an exponent. + if ($inexponent && $token->value !== '-') { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + if ($token->value === '^' || $token->value === '**') { + $inexponent = true; + } + if ($token->value === '/') { + if ($seenslash) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + $seenslash = true; + } + continue; + } + + // Numbers can only be used as exponents and exponents must always be integers. + if ($token->type === token::NUMBER) { + if (!$inexponent) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + if (intval($token->value) != $token->value) { + $this->die(get_string('error_integerexpected', 'qtype_formulas', $token->value), $token); + } + // Only one number is allowed in an exponent, so after the number the + // exponent must be finished. + $inexponent = false; + continue; + } + + // Parentheses are allowed, but we don't have to do anything with them now. + if (in_array($token->type, [token::OPENING_PAREN, token::CLOSING_PAREN])) { + continue; + } + + // All other tokens are not allowed. + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + + // The last token must be a number, a unit or a closing parenthesis. + $finaltoken = end($tokenlist); + if (!in_array($finaltoken->type, [token::UNIT, token::NUMBER, token::CLOSING_PAREN])) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + + $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist); + } + + /** + * Check whether a given unit has already been used. + * + * @param token $token token containing the unit + * @return bool + */ + protected function has_unit_been_used(token $token): bool { + return in_array($token->value, $this->unitlist); + } + + /** + * Check whether all parentheses are balanced and whether only round parens are used. + * Otherweise, stop all further processing and output an error message. + * + * @return void + */ + protected function check_parens(): void { + $parenstack = []; + foreach ($this->tokenlist as $token) { + $type = $token->type; + // We only allow round parens. + if (($token->type & token::ANY_PAREN) && !($token->type & token::OPEN_OR_CLOSE_PAREN)) { + $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } + if ($type === token::OPENING_PAREN) { + $parenstack[] = $token; + } + if ($type === token::CLOSING_PAREN) { + $top = end($parenstack); + // If stack is empty, we have a stray closing paren. + if (!($top instanceof token)) { + $this->die(get_string('error_strayparen', 'qtype_formulas', $token->value), $token); + } + array_pop($parenstack); + } + } + // If the stack of parentheses is not empty now, we have an unmatched opening parenthesis. + if (!empty($parenstack)) { + $unmatched = end($parenstack); + $this->die(get_string('error_parennotclosed', 'qtype_formulas', $unmatched->value), $unmatched); + } + } + + /** + * Translate the given input into a string that can be understood by the legacy unit parser, i. e. + * following all syntax rules. This allows keeping the old unit conversion system in place until + * we are readyd to eventually replace it. + * + * @return string + */ + public function get_legacy_unit_string(): string { + $stack = []; + + foreach ($this->statements[0] as $token) { + // Write numbers and units to the stack. + if (in_array($token->type, [token::UNIT, token::NUMBER])) { + $value = $token->value; + if (is_numeric($value) && $value < 0) { + $value = '(' . strval($value) . ')'; + } + $stack[] = $value; + } + + // Operators take arguments from stack and stick them together in the appropriate way. + if ($token->type === token::OPERATOR) { + $op = $token->value; + if ($op === '**') { + $op = '^'; + } + if ($op === '*') { + $op = ' '; + } + $second = array_pop($stack); + $first = array_pop($stack); + // With the new syntax, it is possible to write e.g. (m/s^2)*kg. In older versions, + // everything coming after the / operator will be considered a part of the denominator, + // so the only way to get the kg into the numerator is to reorder the units and + // write them as kg*m/s^2. Long story short: if there is a division, it must come last. + // Note that the syntax currently does not allow more than one /, so we do not need + // a more sophisticated solution. + if (strpos($first, '/') !== false) { + list($second, $first) = [$first, $second]; + } + // Legacy syntax allowed parens around the entire denominator, so we do that unless the + // denominator is just one unit. + if ($op === '/' && !preg_match('/^[A-Za-z]+$/', $second)) { + $second = '(' . $second . ')'; + } + $stack[] = $first . $op . $second; + } + } + + return implode('', $stack); + } +} diff --git a/lang/en/qtype_formulas.php b/lang/en/qtype_formulas.php index abb1f57d..9375cbb0 100644 --- a/lang/en/qtype_formulas.php +++ b/lang/en/qtype_formulas.php @@ -165,6 +165,7 @@ $string['error_import_missing_field'] = 'Import error. Missing field: {$a} '; $string['error_in_answer'] = 'Error in answer #{$a->answerno}: {$a->message}'; $string['error_indexoutofrange'] = 'Evaluation error: index {$a} out of range.'; +$string['error_integerexpected'] = 'Syntax error: integer expected, found {$a} instead.'; $string['error_inv_consec'] = 'When using inv(), the numbers in the list must be consecutive.'; $string['error_inv_integers'] = 'inv() expects all elements of the list to be integers; floats will be truncated.'; $string['error_inv_list'] = 'inv() expects a list.'; diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php new file mode 100644 index 00000000..cbc09031 --- /dev/null +++ b/tests/unit_parser_test.php @@ -0,0 +1,176 @@ +. + +namespace qtype_formulas; + +defined('MOODLE_INTERNAL') || die(); + +global $CFG; +require_once($CFG->dirroot . '/question/type/formulas/questiontype.php'); + +use Exception; +use qtype_formulas\local\unit_parser; + +/** + * Unit tests for the unit_parser class. + * + * @package qtype_formulas + * @category test + * @copyright 2025 Philipp Imhof + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + * + * @covers \qtype_formulas\local\unit_parser + */ +final class unit_parser_test extends \advanced_testcase { + + public function test_parse_unit_FIXME_REMOVE_WHEN_FINISHED() { + self::assertTrue(true); + return; + $input = '(m/s)^2'; + $parser = new unit_parser($input); + var_dump($parser->get_statements()[0]); + + echo $parser->get_legacy_unit_string(); + } + + /** + * Test parsing of unit inputs. + * + * @dataProvider provide_units + */ + public function test_parse_unit($expected, $input) { + $e = null; + $error = ''; + try { + new unit_parser($input); + } catch (Exception $e) { + $error = $e->getMessage(); + } + + // If we are expecting an error message, the exception object should not be null and + // the message should match, without checking row and column number. + if ($expected[0] === '!') { + self::assertNotNull($e); + self::assertStringEndsWith(substr($expected, 1), $error); + } else { + self::assertNull($e); + } + } + + /** + * Test conversion of unit inputs to legacy input format. + * + * @dataProvider provide_units + */ + public function test_get_legacy_unit_string($expected, $input) { + $e = null; + try { + $parser = new unit_parser($input); + } catch (Exception $e) { + $e->getMessage(); + } + + // If we are not expecting an error, check that the input has been translated as expected. + if ($expected[0] !== '!') { + self::assertEquals($expected, $parser->get_legacy_unit_string()); + } else { + self::assertNotNull($e); + } + } + + /** + * Data provider for the test functions. For simplicity, we use the same provider + * for valid and invalid expressions. In case of invalid expressions, we put an + * exclamation mark (!) at the start of the error message. + * + * @return array + */ + public static function provide_units(): array { + return [ + ['J/(m K)', 'J / m K'], + ['J/(m K)', 'J / m*K'], + ['J/(m K)', 'J / (m K)'], + ['J/(m K)', 'J / (m*K)'], + ['m kg/(s^2)', 'm kg/s^2'], + ['m kg/(s^2)', 'm kg / s^2'], + ['m kg/(s^2)', 'm*kg / s^2'], + ['m kg/(s^2)', 'm*(kg / s^2)'], + ['kg m/(s^2)', '(m/s^2)*kg'], + ['kg m/(s^2)', '(m/s^2) kg'], + ['m kg/(s^2)', '(m (kg / s^(2)))'], + ['m K kg/s', 'm (kg / s) K'], + ['s^(-1)', 's^-1'], + ['s^2', 's**2'], + ['s^(-1)', 's**-1'], + ['s^(-1)', 's^(-1)'], + ['s^(-1)', 's**(-1)'], + ['s^(-1)/(m^(-1))', 's**-1 / m**-1'], + ['m', 'm'], + ['m', '(m)'], + ['km', 'km'], + ['m^2', 'm^2'], + ['m^2', 'm^(2)'], + ['m^2', '(m^2)'], + ['m^2', 'm**2'], + ['m^2', '(m**2)'], + ['m^2', 'm**(2)'], + ['m^2', 'm ^ 2'], + ['m^2', 'm ^ (2)'], + ['m^2', 'm ** 2'], + ['m^2', 'm ** (2)'], + ['m^(-2)', 'm^-2'], + ['m^(-2)', '(m^-2)'], + ['m^(-2)', 'm^(-2)'], + ['m^(-2)', 'm ^ -2'], + ['m^(-2)', 'm ^ (-2)'], + ['m/s', 'm/s'], + ['m/s', '(m)/(s)'], + ['m/s', '(m/s)'], + ['m s^(-1)', 'm s^-1'], + ['m s^(-1)', 'm (s^-1)'], + ['m s^(-1)', 'm (s^(-1))'], + ['m s^(-1)', 'm s^(-1)'], + ['m/(s^(-1))', 'm / (s^(-1))'], + ['m/(s^(-1))', 'm / ((s^(-1)))'], + ['kg m/s', 'kg m/s'], + ['kg m/s', 'kg (m/s)'], + ['kg m/s', 'kg*(m/s)'], + ['kg m/s', 'kg*m/s'], + ['kg m/s', '(kg m)/s'], + ['kg m/s', '(kg*m)/s'], + ['kg m s^(-1)', 'kg m s^-1'], + ['kg m^2', 'kg m^2'], + ['kg m^2', 'kg m ^ 2'], + ['kg m s^(-1)', 'kg m s ^ - 1'], + ['!Unit already used: m', 'm kg / m'], + ['!Unexpected token: 1', 'm 1/s'], + ['!Unexpected token: 1', '1/s'], + ['!Unexpected token: 1', '1 m/s'], + ['!Unexpected token: 2', '2/s'], + ['!Unexpected token: 2.1', '2.1'], + ['!Unexpected token: ^', '^2'], + ['!Unexpected token: *', '*s'], + ['!Unexpected token: *', 'm* *kg'], + ['!Unexpected token: /', '/s'], + ['!Unexpected token: *', 'm*'], + ['!Unexpected token: /', 'm/'], + ['!Unexpected token: ^', 'm^'], + ['!Unexpected token: /', 'm^(/2)'], + ['!Unexpected token: +', 'm^+2'], + ["!Unexpected input: '@'", '@'], + ]; + } +} diff --git a/version.php b/version.php index 7d5ff3a9..d55c6d91 100644 --- a/version.php +++ b/version.php @@ -25,7 +25,7 @@ defined('MOODLE_INTERNAL') || die(); $plugin->component = 'qtype_formulas'; -$plugin->version = 2025021400; +$plugin->version = 2025021402; $plugin->cron = 0; $plugin->requires = 2022112800; From ec8e47ff36c27455d0d3c716c70e21bd74f6d787 Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Sun, 9 Mar 2025 10:06:41 +0100 Subject: [PATCH 2/8] steps to parse unit conversion rules --- classes/local/shunting_yard.php | 6 ++ classes/local/unit_parser.php | 154 ++++++++++++++++++++++++++++++-- tests/unit_parser_test.php | 25 ++++++ 3 files changed, 179 insertions(+), 6 deletions(-) diff --git a/classes/local/shunting_yard.php b/classes/local/shunting_yard.php index 2ffd79dc..03a8855c 100644 --- a/classes/local/shunting_yard.php +++ b/classes/local/shunting_yard.php @@ -622,6 +622,12 @@ public static function unit_infix_to_rpn($tokens): array { if ($value === '^') { $value = '**'; } + // Exponents cannot follow a closing parenthesis, because things like (m/s)^2 cannot + // be translated to legacy syntax before "unit arithmetic" is fully implemented. We use + // $token->value instead of $value to have the operator like it was entered. + if ($value === '**' && $lasttype === token::CLOSING_PAREN) { + self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); + } $thisprecedence = self::get_precedence($value); // We artificially increase the precedence of the division operator, because // legacy versions used implicit parens around the denominator, e. g. diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php index 0b170817..7435f261 100644 --- a/classes/local/unit_parser.php +++ b/classes/local/unit_parser.php @@ -39,6 +39,34 @@ - if * is used after /, assume implicit parens, e. g. J / m * K --> J / (m * K) - do not allow operators other than *, / and ^ as well as unary - (in exponents only) - allow ** instead of ^ +- for the moment: disallow exponent after closing paren to avoid things like (m/s)^2 + + +Syntax for unit conversion rules + +Type 1: SI prefixes + + : ... ; + +- at least one valid prefix +- ; at end, if more statements follow +- base unit single token, no number + +example: + +m : k da d c m u µ; +s : m u µ; + +Type 2: conversion factors / arbitrary prefixes + +[] = [ = ...] ; + +- if first number not given --> 1 +- all further declarations always relative to base unit + +min = 60 s; +h = 60 min; + */ @@ -52,9 +80,77 @@ */ class unit_parser extends parser { + // FIXME: allow unicode chars for micro and ohm in lexer; maybe disallow for variable names (check during assignment) + + const SI_PREFIX_FACTORS = [ + 'd' => 0.1, + 'c' => 0.01, + 'm' => 0.001, + 'u' => 1e-6, + // For convenience, we also allow U+00B5 MICRO SIGN. + "\u{00B5}" => 1e-6, + // For convenience, we also allow U+03BC GREEK SMALL LETTER MU. + "\u{03BC}" => 1e-6, + 'n' => 1e-9, + 'p' => 1e-12, + 'f' => 1e-15, + 'a' => 1e-18, + 'z' => 1e-21, + 'y' => 1e-24, + 'r' => 1e-27, + 'q' => 1e-30, + 'da' => 10, + 'h' => 100, + 'k' => 1000, + 'M' => 1e6, + 'G' => 1e9, + 'T' => 1e12, + 'P' => 1e15, + 'E' => 1e18, + 'Z' => 1e21, + 'Y' => 1e24, + 'R' => 1e27, + 'Q' => 1e30, + ]; + + const DEFAULT_PREFIXES = [ + 's' => ['m', 'u', 'n', 'p', 'f'], + 'm' => ['k', 'da', 'c', 'd', 'm', 'u', 'n', 'p', 'f'], + 'g' => ['k', 'm', 'u', 'n', 'p', 'f'], + 'A' => ['m', 'u', 'n', 'p', 'f'], + 'mol' => ['m', 'u', 'n', 'p'], + 'K' => ['m', 'u', 'n', 'k', 'M'], + 'cd' => ['m', 'k', 'M', 'u', 'G'], + 'N' => ['M', 'k', 'm', 'u', 'n', 'p', 'f'], + 'J' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'], + 'eV' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'], + 'W' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'], + 'Pa' => ['M', 'G', 'T', 'P', 'k', 'h'], + 'Hz' => ['M', 'G', 'T', 'P', 'E', 'k'], + 'C' => ['k', 'm', 'u', 'n', 'p', 'f'], + 'V' => ['M', 'G', 'k', 'm', 'u', 'n', 'p', 'f'], + 'ohm' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'], + 'F' => ['m', 'u', 'n', 'p', 'f'], + 'T' => ['k', 'm', 'u', 'n', 'p'], + 'H' => ['k', 'm', 'u', 'n', 'p'], + ]; + + const DEFAULT_SPECIAL_RULES = [ + // For convenience, we also allow U+2126 OHM SIGN. + "\u{2126}" => ['ohm' => 1], + // For convenience, we also allow U+03A9 GREEK CAPITAL LETTER OMEGA. + "\u{03A9}" => ['ohm' => 1], + 'min' => ['s' => 60], + 'h' => ['s' => 3600], + 'J' => ['eV' => 6.24150947e+18], + ]; + /** @var array list of used units */ private array $unitlist = []; + /** @var string list of all units with their prefixes, allowing to find base units quickly */ + private string $baseunitmap = ''; + /** * Create a unit parser class and have it parse a given input. The input can be given as a string, in * which case it will first be sent to the lexer. If that step has already been made, the constructor @@ -69,16 +165,29 @@ public function __construct($tokenlist) { $tokenlist = $lexer->get_tokens(); } $this->tokenlist = $tokenlist; - $this->count = count($tokenlist); // Check for unbalanced / mismatched parentheses. $this->check_parens(); - // Whether we have already seen a slash or the number one (except in exponents). + // Perform basic syntax check, including classification of IDENTIFIER tokens + // to UNIT tokens. + $this->check_syntax(); + + // Run the tokens through an adapted shunting yard algorithm to bring them into + // RPN notation. + $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist); + + // Build base unit map that will be used to find the base unit for a given unit, + // e. g. find s from ms or Pa from hPa. + $this->build_base_unit_map(); + } + + protected function check_syntax(): void { + // Whether we have already seen a slash or a unit and whether we are in an exponent. $seenslash = false; $seenunit = false; $inexponent = false; - foreach ($tokenlist as $token) { + foreach ($this->tokenlist as $token) { // The use of functions is not permitted in units, so all identifiers will be classified // as UNIT tokens. if ($token->type === token::IDENTIFIER) { @@ -96,7 +205,8 @@ public function __construct($tokenlist) { continue; } - // Do various syntax checks for operators. + // Do various syntax checks for operators. We do them separately in order to allow + // for more specific error messages, if needed. if ($token->type === token::OPERATOR) { // We can only accept an operator if there has been at least one unit before. if (!$seenunit) { @@ -151,12 +261,44 @@ public function __construct($tokenlist) { } // The last token must be a number, a unit or a closing parenthesis. - $finaltoken = end($tokenlist); + $finaltoken = end($this->tokenlist); if (!in_array($finaltoken->type, [token::UNIT, token::NUMBER, token::CLOSING_PAREN])) { $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token); } + } - $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist); + protected function build_base_unit_map(): void { + // First, we add all the built-in default prefix rules. + foreach (self::DEFAULT_PREFIXES as $base => $prefixes) { + foreach ($prefixes as $prefix) { + $this->baseunitmap .= '|' . $prefix . $base . ':' . $base; + } + } + + // Next, the built-in special prefix rules. + + + // Finally, we add user-defined rules. + } + + protected function find_base_unit(string $unit): string { + // Example, must be built from config / rules. Format "pipe - unit with prefix - colon - base unit". + // Our definitions first, user's definition last. + $map = '|s:s|ms:s|us:s|µs:s|cm:m|dm:m|hPa:Pa|kg:g'; + + $matches = []; + preg_match_all('/\|' . $unit . ':([^|]+)/', $map, $matches); + + // Array $matches has two entries, $matches[0] are full pattern matches (e.g. '|ms:s') and + // $matches[1] are matches of base units. If there are no matches at all, the unit was not found. + // This cannot normally happen. If it does, we return the unit as-is. + if (count($matches[1]) === 0) { + return $unit; + } + + // In all other cases, we return the last possible match. In most cases there will be only one, + // but if there are more than one, the user-defined unit should be taken. + return end($matches[1]); } /** diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php index cbc09031..2820d0a8 100644 --- a/tests/unit_parser_test.php +++ b/tests/unit_parser_test.php @@ -22,6 +22,8 @@ require_once($CFG->dirroot . '/question/type/formulas/questiontype.php'); use Exception; +use qtype_formulas\local\lexer; +use qtype_formulas\local\token; use qtype_formulas\local\unit_parser; /** @@ -33,6 +35,7 @@ * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later * * @covers \qtype_formulas\local\unit_parser + * @covers \qtype_formulas\local\shunting_yard */ final class unit_parser_test extends \advanced_testcase { @@ -46,6 +49,26 @@ public function test_parse_unit_FIXME_REMOVE_WHEN_FINISHED() { echo $parser->get_legacy_unit_string(); } + public function test_parse_unit_rules_FIXME() { + $input = 'm: k da d c m u; s: m u; min = 60 s;'; + + + + $lexer = new lexer($input); + $tokens = $lexer->get_tokens(); + var_dump($tokens); + + $rules = []; + foreach ($tokens as $token) { + + + if ($token->type === token::END_OF_STATEMENT) { + $rules[] = $currentrule; + } + } + + } + /** * Test parsing of unit inputs. * @@ -170,6 +193,8 @@ public static function provide_units(): array { ['!Unexpected token: ^', 'm^'], ['!Unexpected token: /', 'm^(/2)'], ['!Unexpected token: +', 'm^+2'], + ['!Unexpected token: ^', '(m/s)^2'], + ['!Unexpected token: **', '(m/s)**2'], ["!Unexpected input: '@'", '@'], ]; } From 9ff673af49fe60f7c15f858658ee279fcd105f6b Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Sun, 13 Apr 2025 10:57:59 +0200 Subject: [PATCH 3/8] prepare compatibility layer for units --- classes/local/unit_parser.php | 173 ++-------------------------------- tests/unit_parser_test.php | 32 ------- 2 files changed, 10 insertions(+), 195 deletions(-) diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php index 7435f261..97feb22b 100644 --- a/classes/local/unit_parser.php +++ b/classes/local/unit_parser.php @@ -16,61 +16,6 @@ namespace qtype_formulas\local; -/* - -Notes about current implementation: - -- only 2 operators allowed: ^ for exponentiation and / for division -- only one / allowed -- no * allowed -- no parens allowed, except in exponent or around the *entire* denominator -- right side of / is considered in parens, even if not written, e.g. J/m*K --> J / (m*K) -- only units, no numbers except for exponents -- positive or negative exponents allowed -- negative exponents allowed with or without parens -- same unit not allowed more than once - -Future implementation, must be 100% backwards compatible - -- allow parens everywhere -- allow * for explicit multiplication of units -- still only allow one / -- still not allow same unit more than once -- if * is used after /, assume implicit parens, e. g. J / m * K --> J / (m * K) -- do not allow operators other than *, / and ^ as well as unary - (in exponents only) -- allow ** instead of ^ -- for the moment: disallow exponent after closing paren to avoid things like (m/s)^2 - - -Syntax for unit conversion rules - -Type 1: SI prefixes - - : ... ; - -- at least one valid prefix -- ; at end, if more statements follow -- base unit single token, no number - -example: - -m : k da d c m u µ; -s : m u µ; - -Type 2: conversion factors / arbitrary prefixes - -[] = [ = ...] ; - -- if first number not given --> 1 -- all further declarations always relative to base unit - -min = 60 s; -h = 60 min; - - -*/ - - /** * Parser for units for qtype_formulas * @@ -80,77 +25,9 @@ */ class unit_parser extends parser { - // FIXME: allow unicode chars for micro and ohm in lexer; maybe disallow for variable names (check during assignment) - - const SI_PREFIX_FACTORS = [ - 'd' => 0.1, - 'c' => 0.01, - 'm' => 0.001, - 'u' => 1e-6, - // For convenience, we also allow U+00B5 MICRO SIGN. - "\u{00B5}" => 1e-6, - // For convenience, we also allow U+03BC GREEK SMALL LETTER MU. - "\u{03BC}" => 1e-6, - 'n' => 1e-9, - 'p' => 1e-12, - 'f' => 1e-15, - 'a' => 1e-18, - 'z' => 1e-21, - 'y' => 1e-24, - 'r' => 1e-27, - 'q' => 1e-30, - 'da' => 10, - 'h' => 100, - 'k' => 1000, - 'M' => 1e6, - 'G' => 1e9, - 'T' => 1e12, - 'P' => 1e15, - 'E' => 1e18, - 'Z' => 1e21, - 'Y' => 1e24, - 'R' => 1e27, - 'Q' => 1e30, - ]; - - const DEFAULT_PREFIXES = [ - 's' => ['m', 'u', 'n', 'p', 'f'], - 'm' => ['k', 'da', 'c', 'd', 'm', 'u', 'n', 'p', 'f'], - 'g' => ['k', 'm', 'u', 'n', 'p', 'f'], - 'A' => ['m', 'u', 'n', 'p', 'f'], - 'mol' => ['m', 'u', 'n', 'p'], - 'K' => ['m', 'u', 'n', 'k', 'M'], - 'cd' => ['m', 'k', 'M', 'u', 'G'], - 'N' => ['M', 'k', 'm', 'u', 'n', 'p', 'f'], - 'J' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'], - 'eV' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'], - 'W' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'], - 'Pa' => ['M', 'G', 'T', 'P', 'k', 'h'], - 'Hz' => ['M', 'G', 'T', 'P', 'E', 'k'], - 'C' => ['k', 'm', 'u', 'n', 'p', 'f'], - 'V' => ['M', 'G', 'k', 'm', 'u', 'n', 'p', 'f'], - 'ohm' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'], - 'F' => ['m', 'u', 'n', 'p', 'f'], - 'T' => ['k', 'm', 'u', 'n', 'p'], - 'H' => ['k', 'm', 'u', 'n', 'p'], - ]; - - const DEFAULT_SPECIAL_RULES = [ - // For convenience, we also allow U+2126 OHM SIGN. - "\u{2126}" => ['ohm' => 1], - // For convenience, we also allow U+03A9 GREEK CAPITAL LETTER OMEGA. - "\u{03A9}" => ['ohm' => 1], - 'min' => ['s' => 60], - 'h' => ['s' => 3600], - 'J' => ['eV' => 6.24150947e+18], - ]; - /** @var array list of used units */ private array $unitlist = []; - /** @var string list of all units with their prefixes, allowing to find base units quickly */ - private string $baseunitmap = ''; - /** * Create a unit parser class and have it parse a given input. The input can be given as a string, in * which case it will first be sent to the lexer. If that step has already been made, the constructor @@ -176,12 +53,14 @@ public function __construct($tokenlist) { // Run the tokens through an adapted shunting yard algorithm to bring them into // RPN notation. $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist); - - // Build base unit map that will be used to find the base unit for a given unit, - // e. g. find s from ms or Pa from hPa. - $this->build_base_unit_map(); } + /** + * Check if the unit expression respects all syntax constraints, i. e. only valid operators (division, + * multiplication, exponentiation), valid use of parentheses, only one division operator. We only allow + * stuff that can be converted into the legacy syntax, because we still rely on the original unit + * conversion code for now. * + */ protected function check_syntax(): void { // Whether we have already seen a slash or a unit and whether we are in an exponent. $seenslash = false; @@ -267,42 +146,10 @@ protected function check_syntax(): void { } } - protected function build_base_unit_map(): void { - // First, we add all the built-in default prefix rules. - foreach (self::DEFAULT_PREFIXES as $base => $prefixes) { - foreach ($prefixes as $prefix) { - $this->baseunitmap .= '|' . $prefix . $base . ':' . $base; - } - } - - // Next, the built-in special prefix rules. - - - // Finally, we add user-defined rules. - } - - protected function find_base_unit(string $unit): string { - // Example, must be built from config / rules. Format "pipe - unit with prefix - colon - base unit". - // Our definitions first, user's definition last. - $map = '|s:s|ms:s|us:s|µs:s|cm:m|dm:m|hPa:Pa|kg:g'; - - $matches = []; - preg_match_all('/\|' . $unit . ':([^|]+)/', $map, $matches); - - // Array $matches has two entries, $matches[0] are full pattern matches (e.g. '|ms:s') and - // $matches[1] are matches of base units. If there are no matches at all, the unit was not found. - // This cannot normally happen. If it does, we return the unit as-is. - if (count($matches[1]) === 0) { - return $unit; - } - - // In all other cases, we return the last possible match. In most cases there will be only one, - // but if there are more than one, the user-defined unit should be taken. - return end($matches[1]); - } - /** - * Check whether a given unit has already been used. + * Check whether a given unit has already been used. As we currently still use the original code for + * unit conversion stuff, we stick to the same behaviour. Hence, we do not take into account the prefixes, + * i. e. km and m will be considered as different units. * * @param token $token token containing the unit * @return bool @@ -347,7 +194,7 @@ protected function check_parens(): void { /** * Translate the given input into a string that can be understood by the legacy unit parser, i. e. * following all syntax rules. This allows keeping the old unit conversion system in place until - * we are readyd to eventually replace it. + * we are ready to eventually replace it. * * @return string */ diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php index 2820d0a8..ab3b9b45 100644 --- a/tests/unit_parser_test.php +++ b/tests/unit_parser_test.php @@ -22,8 +22,6 @@ require_once($CFG->dirroot . '/question/type/formulas/questiontype.php'); use Exception; -use qtype_formulas\local\lexer; -use qtype_formulas\local\token; use qtype_formulas\local\unit_parser; /** @@ -39,36 +37,6 @@ */ final class unit_parser_test extends \advanced_testcase { - public function test_parse_unit_FIXME_REMOVE_WHEN_FINISHED() { - self::assertTrue(true); - return; - $input = '(m/s)^2'; - $parser = new unit_parser($input); - var_dump($parser->get_statements()[0]); - - echo $parser->get_legacy_unit_string(); - } - - public function test_parse_unit_rules_FIXME() { - $input = 'm: k da d c m u; s: m u; min = 60 s;'; - - - - $lexer = new lexer($input); - $tokens = $lexer->get_tokens(); - var_dump($tokens); - - $rules = []; - foreach ($tokens as $token) { - - - if ($token->type === token::END_OF_STATEMENT) { - $rules[] = $currentrule; - } - } - - } - /** * Test parsing of unit inputs. * From 0c5c72234a69d5fb9955c05e065ce72da23ec780 Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Sun, 13 Apr 2025 11:24:01 +0200 Subject: [PATCH 4/8] don't change version --- version.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version.php b/version.php index d55c6d91..7d5ff3a9 100644 --- a/version.php +++ b/version.php @@ -25,7 +25,7 @@ defined('MOODLE_INTERNAL') || die(); $plugin->component = 'qtype_formulas'; -$plugin->version = 2025021402; +$plugin->version = 2025021400; $plugin->cron = 0; $plugin->requires = 2022112800; From d912860399f129065156fe3f59a75222c18b80b2 Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Sun, 13 Apr 2025 15:04:44 +0200 Subject: [PATCH 5/8] added tests, cleanup --- classes/local/shunting_yard.php | 9 --------- tests/unit_parser_test.php | 28 ++++++++++++++++++++++++---- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/classes/local/shunting_yard.php b/classes/local/shunting_yard.php index 03a8855c..e4324dd8 100644 --- a/classes/local/shunting_yard.php +++ b/classes/local/shunting_yard.php @@ -609,10 +609,6 @@ public static function unit_infix_to_rpn($tokens): array { // Deal with all the possible operators... case token::OPERATOR: - // Expressions must not start with an operator. - if (is_null($lasttoken)) { - self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); - } // Operators must not follow an opening parenthesis, except for the unary minus. if ($lasttype === token::OPENING_PAREN && $value !== '-') { self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); @@ -646,11 +642,6 @@ public static function unit_infix_to_rpn($tokens): array { // Put the operator on the stack. $opstack[] = $token; break; - - // If we still haven't dealt with the token, there must be a problem with the input. - default: - self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token); - } $lasttoken = $token; diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php index ab3b9b45..15150729 100644 --- a/tests/unit_parser_test.php +++ b/tests/unit_parser_test.php @@ -42,7 +42,7 @@ final class unit_parser_test extends \advanced_testcase { * * @dataProvider provide_units */ - public function test_parse_unit($expected, $input) { + public function test_parse_unit($expected, $input): void { $e = null; $error = ''; try { @@ -66,7 +66,7 @@ public function test_parse_unit($expected, $input) { * * @dataProvider provide_units */ - public function test_get_legacy_unit_string($expected, $input) { + public function test_get_legacy_unit_string($expected, $input): void { $e = null; try { $parser = new unit_parser($input); @@ -146,7 +146,27 @@ public static function provide_units(): array { ['kg m^2', 'kg m^2'], ['kg m^2', 'kg m ^ 2'], ['kg m s^(-1)', 'kg m s ^ - 1'], - ['!Unit already used: m', 'm kg / m'], + ['!Syntax error: integer expected, found 2.5 instead.', 'm^2.5'], + ["!Unbalanced parenthesis, stray ')' found.", 'm/s)'], + ["!Unbalanced parenthesis, '(' is never closed.", '(m/s'], + ["!Unexpected input: '@'", '@'], + ['!Unexpected token: s', 'm^s'], + ['!Unexpected token: -', 'm*-s'], + ['!Unexpected token: /', 'm/s/K'], + ['!Unexpected token: /', 'm/s * kg/m^2'], + ['!Unexpected token: π', 'm*π'], + ['!Unexpected token: ,', 'm,s'], + ['!Unexpected token: [', '[m/s]'], + ['!Unexpected token: )', '(m*)'], + ['!Unexpected token: +', 'm+km'], + ['!Unexpected token: *', '*'], + ['!Unexpected token: *', '*m'], + ['!Unexpected token: /', '/m'], + ['!Unexpected token: *', 'm*(*s)'], + ['!Unexpected token: *', '(*m)'], + ['!Unexpected token: **', '(**m)'], + ['!Unexpected token: ^', '(^m)'], + ['!Unexpected token: {', '{m/s}'], ['!Unexpected token: 1', 'm 1/s'], ['!Unexpected token: 1', '1/s'], ['!Unexpected token: 1', '1 m/s'], @@ -163,7 +183,7 @@ public static function provide_units(): array { ['!Unexpected token: +', 'm^+2'], ['!Unexpected token: ^', '(m/s)^2'], ['!Unexpected token: **', '(m/s)**2'], - ["!Unexpected input: '@'", '@'], + ['!Unit already used: m', 'm kg / m'], ]; } } From 1afc557608a08a127d499a1230db5e122c332a05 Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Tue, 15 Apr 2025 09:39:04 +0200 Subject: [PATCH 6/8] include new unit parser in grading and validation --- question.php | 18 ++++++++++++++++-- questiontype.php | 18 ++++++++++++++++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/question.php b/question.php index b05a05b2..23997955 100644 --- a/question.php +++ b/question.php @@ -36,6 +36,7 @@ use qtype_formulas\local\random_parser; use qtype_formulas\local\parser; use qtype_formulas\local\token; +use qtype_formulas\local\unit_parser; use qtype_formulas\unit_conversion_rules; defined('MOODLE_INTERNAL') || die(); @@ -1481,7 +1482,7 @@ public function grade(array $response, bool $finalsubmit = false): array { // formulas, we must wrap the answers in quotes before we move on. Also, we reset the conversion // factor, because it is not needed for algebraic answers. if ($isalgebraic) { - $response = self::wrap_algebraic_formulas_in_quotes($response); + $response = self::wrap_algebraic_formulas_in_quotes($response); $conversionfactor = 1; } @@ -1572,7 +1573,20 @@ private function is_compatible_unit(string $studentsunit) { $checkunit->assign_default_rules($this->ruleid, $entry[1]); $checkunit->assign_additional_rules($this->otherrule); - $checked = $checkunit->check_convertibility($studentsunit, $this->postunit); + // Use the compatibility layer to parse the unit string and convert it into the old format. + // If parsing fails, we can immediately return false. + try { + $parser = new unit_parser($studentsunit); + $postunitparser = new unit_parser($this->postunit); + } catch (Exception $e) { + // TODO: convert to non-capturing catch + return false; + } + + $checked = $checkunit->check_convertibility( + $parser->get_legacy_unit_string(), + $postunitparser->get_legacy_unit_string(), + ); if ($checked->convertible) { return $checked->cfactor; } diff --git a/questiontype.php b/questiontype.php index 4d26907e..d8de20f5 100644 --- a/questiontype.php +++ b/questiontype.php @@ -31,6 +31,7 @@ use qtype_formulas\local\answer_parser; use qtype_formulas\local\parser; use qtype_formulas\local\token; +use qtype_formulas\local\unit_parser; defined('MOODLE_INTERNAL') || die(); @@ -1228,9 +1229,22 @@ public function check_variables_and_expressions(object $data, array $parts): obj // If a unit has been provided, check whether it can be parsed. if (!empty($part->postunit)) { try { - $unitcheck->parse_targets($part->postunit); + $unitparser = new unit_parser($part->postunit); + $unitcheck->parse_targets($unitparser->get_legacy_unit_string()); } catch (Exception $e) { - $errors["postunit[$i]"] = get_string('error_unit', 'qtype_formulas'); + $trace = $e->getTraceAsString(); + // If we are coming from the newer code, use the detailed error message without + // the row/column number. Otherwise just use the generic message provided by the + // legacy code. + // TODO: Use str_contains() once we drop support for PHP < 8.0. + if (strstr($trace, 'unit_parser.php') !== false) { + // The error message may contain line and column numbers, but they don't make + // sense in this context, so we'd rather remove them. + $errors["postunit[$i]"] = preg_replace('/([^:]+:)([^:]+:)/', '', $e->getMessage()); + + } else { + $errors["postunit[$i]"] = get_string('error_unit', 'qtype_formulas'); + } } } From fa879718bd9b3ba4a83c56d4f0d46d622b8596db Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Tue, 15 Apr 2025 10:17:51 +0200 Subject: [PATCH 7/8] deal with empty unit input --- classes/local/unit_parser.php | 7 +++++++ tests/questiontype_test.php | 12 ++++++++++-- tests/unit_parser_test.php | 5 +++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php index 97feb22b..165b7017 100644 --- a/classes/local/unit_parser.php +++ b/classes/local/unit_parser.php @@ -43,6 +43,13 @@ public function __construct($tokenlist) { } $this->tokenlist = $tokenlist; + // The unit_parser might have been called on an empty input. If this is the case, we store an + // empty statement and stop here. + if (empty($this->tokenlist)) { + $this->statements[] = []; + return; + } + // Check for unbalanced / mismatched parentheses. $this->check_parens(); diff --git a/tests/questiontype_test.php b/tests/questiontype_test.php index 9778eb35..9b993ca0 100644 --- a/tests/questiontype_test.php +++ b/tests/questiontype_test.php @@ -467,10 +467,18 @@ public static function provide_single_part_data_for_form_validation(): array { 'correctness' => [0 => '1/0'], ], ], + [[], ['postunit' => [0 => 'a/b*c']], + ], + [ + ['postunit[0]' => 'Unit already used: m'], + [ + 'postunit' => [0 => 'm*m'], + ], + ], [ - ['postunit[0]' => get_string('error_unit', 'qtype_formulas')], + ['postunit[0]' => 'Unexpected token: ^'], [ - 'postunit' => [0 => 'a/b*c'], + 'postunit' => [0 => '(m/s)^2'], ], ], [ diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php index 15150729..1488ab85 100644 --- a/tests/unit_parser_test.php +++ b/tests/unit_parser_test.php @@ -53,7 +53,7 @@ public function test_parse_unit($expected, $input): void { // If we are expecting an error message, the exception object should not be null and // the message should match, without checking row and column number. - if ($expected[0] === '!') { + if (!empty($expected) && $expected[0] === '!') { self::assertNotNull($e); self::assertStringEndsWith(substr($expected, 1), $error); } else { @@ -75,7 +75,7 @@ public function test_get_legacy_unit_string($expected, $input): void { } // If we are not expecting an error, check that the input has been translated as expected. - if ($expected[0] !== '!') { + if (empty($expected) || $expected[0] !== '!') { self::assertEquals($expected, $parser->get_legacy_unit_string()); } else { self::assertNotNull($e); @@ -91,6 +91,7 @@ public function test_get_legacy_unit_string($expected, $input): void { */ public static function provide_units(): array { return [ + ['', ''], ['J/(m K)', 'J / m K'], ['J/(m K)', 'J / m*K'], ['J/(m K)', 'J / (m K)'], From 95093d2e3675316a3531c6c04d37d1819ccebd75 Mon Sep 17 00:00:00 2001 From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com> Date: Tue, 15 Apr 2025 17:48:01 +0200 Subject: [PATCH 8/8] add test for grading with unit compatibility layer --- tests/question_test.php | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/question_test.php b/tests/question_test.php index 35242b4f..df1b01af 100644 --- a/tests/question_test.php +++ b/tests/question_test.php @@ -800,6 +800,32 @@ public function test_grade_parts_that_can_be_graded_6(): void { self::assertEquals($expected, $partscores); } + public function test_grade_with_unit_compatibility_layer(): void { + $q = $this->get_test_formulas_question('testmethodsinparts'); + + // We overwrite the postunit. In the first part, we use the old syntax for the model unit and + // the new syntax in the response. In the second part, we use the new syntax for the model + // unit and the old syntax in the response. + $q->parts[0]->postunit = 'm s'; + $q->parts[1]->postunit = 'm*s'; + + $q->start_attempt(new question_attempt_step(), 1); + + // phpcs:ignore Universal.Arrays.DuplicateArrayKey.Found + $response = ['0_' => '40 m*s', '1_0' => '40', '1_1' => 'm s', '2_0' => '40', '3_0' => '40']; + $partscores = $q->grade_parts_that_can_be_graded($response, [], false); + + // The latest $response is correct for all parts #0 and #2. Note that the penalty value will + // be set according to the question data; it does not mean that a deduction occurred. + $expected = [ + '0' => new qbehaviour_adaptivemultipart_part_result('0', 1, 0.3), + '1' => new qbehaviour_adaptivemultipart_part_result('1', 1, 0.3), + '2' => new qbehaviour_adaptivemultipart_part_result('2', 1, 0.3), + '3' => new qbehaviour_adaptivemultipart_part_result('3', 1, 0.3), + ]; + self::assertEquals($expected, $partscores); + } + public function test_get_parts_and_weights_singlenum(): void { $q = $this->get_test_formulas_question('testsinglenum');