From ca0efb775ddb0440bb066cb583e72f883ca10aa0 Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Sat, 1 Mar 2025 11:58:11 +0100
Subject: [PATCH 1/8] first step for unit parser
---
classes/local/parser.php | 6 +-
classes/local/shunting_yard.php | 127 ++++++++++++++++
classes/local/token.php | 3 +
classes/local/unit_parser.php | 256 ++++++++++++++++++++++++++++++++
lang/en/qtype_formulas.php | 1 +
tests/unit_parser_test.php | 176 ++++++++++++++++++++++
version.php | 2 +-
7 files changed, 567 insertions(+), 4 deletions(-)
create mode 100644 classes/local/unit_parser.php
create mode 100644 tests/unit_parser_test.php
diff --git a/classes/local/parser.php b/classes/local/parser.php
index c198b480..eec95421 100644
--- a/classes/local/parser.php
+++ b/classes/local/parser.php
@@ -33,7 +33,7 @@ class parser {
protected array $tokenlist;
/** @var int number of (raw) tokens */
- private int $count;
+ protected int $count;
/** @var int position w.r.t. list of (raw) tokens */
private int $position = -1;
@@ -98,7 +98,7 @@ private function parse_the_right_thing(token $token) {
*
* @return void
*/
- private function check_unbalanced_parens(): void {
+ protected function check_unbalanced_parens(): void {
$parenstack = [];
foreach ($this->tokenlist as $token) {
$type = $token->type;
@@ -399,7 +399,7 @@ private function peek(int $skip = 0): ?token {
*
* @return token|null
*/
- private function read_next(): ?token {
+ protected function read_next(): ?token {
$nexttoken = $this->peek();
if ($nexttoken !== self::EOF) {
$this->position++;
diff --git a/classes/local/shunting_yard.php b/classes/local/shunting_yard.php
index da309966..2ffd79dc 100644
--- a/classes/local/shunting_yard.php
+++ b/classes/local/shunting_yard.php
@@ -528,6 +528,133 @@ public static function infix_to_rpn(array $tokens): array {
return $output;
}
+ /**
+ * Translate unit expression from infix into RPN notation via Dijkstra's shunting yard algorithm,
+ * because this makes evaluation much easier.
+ *
+ * @param array $tokens the tokens forming the expression that is to be translated
+ * @return array
+ */
+ public static function unit_infix_to_rpn($tokens): array {
+ $output = [];
+ $opstack = [];
+
+ $lasttoken = null;
+ $lasttype = null;
+ $lastvalue = null;
+ foreach ($tokens as $token) {
+ $type = $token->type;
+ $value = $token->value;
+
+ if (!is_null($lasttoken)) {
+ $lasttype = $lasttoken->type;
+ $lastvalue = $lasttoken->value;
+ }
+
+ // Insert inplicit multiplication sign between two consecutive UNIT tokens.
+ // For accurate error reporting, the row and column number of the implicit
+ // multiplication token are copied over from the current token which triggered
+ // the multiplication.
+ $unitunit = ($lasttype === token::UNIT && $type === token::UNIT);
+ $unitparen = ($lasttype === token::UNIT && $type === token::OPENING_PAREN);
+ $parenunit = ($lasttype === token::CLOSING_PAREN && $type === token::UNIT);
+ $parenparen = ($lasttype === token::CLOSING_PAREN && $type === token::OPENING_PAREN);
+ if ($unitunit || $unitparen || $parenunit || $parenparen) {
+ // For backwards compatibility, division will have a lower precedence than multiplication,
+ // in order for J / m K to be interpreted as J / (m K). Instead of introducing a special
+ // 'unit multiplication' pseudo-operator, we simply increase the multiplication's precedence
+ // by one when flushing operators from the opstack.
+ self::flush_higher_precedence($opstack, self::get_precedence('*') + 1, $output);
+ $opstack[] = new token(token::OPERATOR, '*', $token->row, $token->column);
+ }
+
+ // Two consecutive operators are only possible if the unary minus follows exponentiation.
+ // Note: We do not have to check whether the first of them is exponentiation, because we
+ // only allow - in the exponent anyway.
+ if ($type === token::OPERATOR && $lasttype === token::OPERATOR && $value !== '-') {
+ self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
+ }
+
+ switch ($type) {
+ // UNIT tokens go straight to the output queue.
+ case token::UNIT:
+ $output[] = $token;
+ break;
+
+ // Numbers go to the output queue.
+ case token::NUMBER:
+ // If the last token was the unary minus, we multiply the number by -1 before
+ // sending it to the output queue. Afterwards, we can remove the minus from the opstack.
+ if ($lasttype === token::OPERATOR && $lastvalue === '-') {
+ $token->value = -$token->value;
+ array_pop($opstack);
+ }
+ $output[] = $token;
+ break;
+
+ // Opening parentheses go straight to the operator stack.
+ case token::OPENING_PAREN:
+ $opstack[] = $token;
+ break;
+
+ // A closing parenthesis means we flush all operators until we get to the
+ // matching opening parenthesis.
+ case token::CLOSING_PAREN:
+ // A closing parenthesis must not occur immediately after an operator.
+ if ($lasttype === token::OPERATOR) {
+ self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
+ }
+ self::flush_until_paren($opstack, token::OPENING_PAREN, $output);
+ break;
+
+ // Deal with all the possible operators...
+ case token::OPERATOR:
+ // Expressions must not start with an operator.
+ if (is_null($lasttoken)) {
+ self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
+ }
+ // Operators must not follow an opening parenthesis, except for the unary minus.
+ if ($lasttype === token::OPENING_PAREN && $value !== '-') {
+ self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
+ }
+ // Before fetching the precedence, we must translate ^ (caret) into **, because
+ // the ^ operator normally has a different meaning with lower precedence.
+ if ($value === '^') {
+ $value = '**';
+ }
+ $thisprecedence = self::get_precedence($value);
+ // We artificially increase the precedence of the division operator, because
+ // legacy versions used implicit parens around the denominator, e. g.
+ // the expression J / m K would be interpreted as J / (m * K). This is consistent
+ // with what tools like Wolfram Alpha do, even though e. g. 1 / 2 3 would be read
+ // as 3/2 both by Formulas Question and Wolfram Alpha. And even if it were not, it
+ // is not possible to change that, because it could break existing questions.
+ if ($value === '*') {
+ $thisprecedence++;
+ }
+ // Flush operators with higher precedence, unless we have a unary minus, because
+ // it is not left-associative.
+ if ($value !== '-') {
+ self::flush_higher_precedence($opstack, $thisprecedence, $output);
+ }
+ // Put the operator on the stack.
+ $opstack[] = $token;
+ break;
+
+ // If we still haven't dealt with the token, there must be a problem with the input.
+ default:
+ self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
+
+ }
+
+ $lasttoken = $token;
+ }
+ // After last token, flush opstack. Last token must be either a number (in exponent),
+ // a closing parenthesis or a unit.
+ self::flush_all($opstack, $output);
+ return $output;
+ }
+
/**
* Stop processing and indicate the human readable position (row/column) where the error occurred.
*
diff --git a/classes/local/token.php b/classes/local/token.php
index 4e34c8c0..922dec20 100644
--- a/classes/local/token.php
+++ b/classes/local/token.php
@@ -123,6 +123,9 @@ class token {
/** @var int used to designate a token storing an end-of-group marker (closing brace) */
const END_GROUP = 4194304;
+ /** @var int used to designate a token storing a unit */
+ const UNIT = 8388608;
+
/** @var mixed the token's content, will be the name for identifiers */
public $value;
diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php
new file mode 100644
index 00000000..0b170817
--- /dev/null
+++ b/classes/local/unit_parser.php
@@ -0,0 +1,256 @@
+.
+
+namespace qtype_formulas\local;
+
+/*
+
+Notes about current implementation:
+
+- only 2 operators allowed: ^ for exponentiation and / for division
+- only one / allowed
+- no * allowed
+- no parens allowed, except in exponent or around the *entire* denominator
+- right side of / is considered in parens, even if not written, e.g. J/m*K --> J / (m*K)
+- only units, no numbers except for exponents
+- positive or negative exponents allowed
+- negative exponents allowed with or without parens
+- same unit not allowed more than once
+
+Future implementation, must be 100% backwards compatible
+
+- allow parens everywhere
+- allow * for explicit multiplication of units
+- still only allow one /
+- still not allow same unit more than once
+- if * is used after /, assume implicit parens, e. g. J / m * K --> J / (m * K)
+- do not allow operators other than *, / and ^ as well as unary - (in exponents only)
+- allow ** instead of ^
+
+*/
+
+
+/**
+ * Parser for units for qtype_formulas
+ *
+ * @package qtype_formulas
+ * @copyright 2025 Philipp Imhof
+ * @license https://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
+ */
+class unit_parser extends parser {
+
+ /** @var array list of used units */
+ private array $unitlist = [];
+
+ /**
+ * Create a unit parser class and have it parse a given input. The input can be given as a string, in
+ * which case it will first be sent to the lexer. If that step has already been made, the constructor
+ * also accepts a list of tokens.
+ *
+ * @param string|array $tokenlist list of tokens as returned from the lexer or input string
+ */
+ public function __construct($tokenlist) {
+ // If the input is given as a string, run it through the lexer first.
+ if (is_string($tokenlist)) {
+ $lexer = new lexer($tokenlist);
+ $tokenlist = $lexer->get_tokens();
+ }
+ $this->tokenlist = $tokenlist;
+ $this->count = count($tokenlist);
+
+ // Check for unbalanced / mismatched parentheses.
+ $this->check_parens();
+
+ // Whether we have already seen a slash or the number one (except in exponents).
+ $seenslash = false;
+ $seenunit = false;
+ $inexponent = false;
+ foreach ($tokenlist as $token) {
+ // The use of functions is not permitted in units, so all identifiers will be classified
+ // as UNIT tokens.
+ if ($token->type === token::IDENTIFIER) {
+ // If inside an exponent, only numbers (and maybe the unary minus) are allowed.
+ if ($inexponent) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ // The same unit must not be used more than once.
+ if ($this->has_unit_been_used($token)) {
+ $this->die('Unit already used: ' . $token->value, $token);
+ }
+ $this->unitlist[] = $token->value;
+ $token->type = token::UNIT;
+ $seenunit = true;
+ continue;
+ }
+
+ // Do various syntax checks for operators.
+ if ($token->type === token::OPERATOR) {
+ // We can only accept an operator if there has been at least one unit before.
+ if (!$seenunit) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ // The only operators allowed are exponentiation, multiplication, division and the unary minus.
+ // Note that the caret (^) always means exponentiation in the context of units.
+ if (!in_array($token->value, ['^', '**', '/', '*', '-'])) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ // The unary minus is only allowed inside an exponent.
+ if ($token->value === '-' && !$inexponent) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ // Only the unary minus is allowed inside an exponent.
+ if ($inexponent && $token->value !== '-') {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ if ($token->value === '^' || $token->value === '**') {
+ $inexponent = true;
+ }
+ if ($token->value === '/') {
+ if ($seenslash) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ $seenslash = true;
+ }
+ continue;
+ }
+
+ // Numbers can only be used as exponents and exponents must always be integers.
+ if ($token->type === token::NUMBER) {
+ if (!$inexponent) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ if (intval($token->value) != $token->value) {
+ $this->die(get_string('error_integerexpected', 'qtype_formulas', $token->value), $token);
+ }
+ // Only one number is allowed in an exponent, so after the number the
+ // exponent must be finished.
+ $inexponent = false;
+ continue;
+ }
+
+ // Parentheses are allowed, but we don't have to do anything with them now.
+ if (in_array($token->type, [token::OPENING_PAREN, token::CLOSING_PAREN])) {
+ continue;
+ }
+
+ // All other tokens are not allowed.
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+
+ // The last token must be a number, a unit or a closing parenthesis.
+ $finaltoken = end($tokenlist);
+ if (!in_array($finaltoken->type, [token::UNIT, token::NUMBER, token::CLOSING_PAREN])) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+
+ $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist);
+ }
+
+ /**
+ * Check whether a given unit has already been used.
+ *
+ * @param token $token token containing the unit
+ * @return bool
+ */
+ protected function has_unit_been_used(token $token): bool {
+ return in_array($token->value, $this->unitlist);
+ }
+
+ /**
+ * Check whether all parentheses are balanced and whether only round parens are used.
+ * Otherweise, stop all further processing and output an error message.
+ *
+ * @return void
+ */
+ protected function check_parens(): void {
+ $parenstack = [];
+ foreach ($this->tokenlist as $token) {
+ $type = $token->type;
+ // We only allow round parens.
+ if (($token->type & token::ANY_PAREN) && !($token->type & token::OPEN_OR_CLOSE_PAREN)) {
+ $this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
+ if ($type === token::OPENING_PAREN) {
+ $parenstack[] = $token;
+ }
+ if ($type === token::CLOSING_PAREN) {
+ $top = end($parenstack);
+ // If stack is empty, we have a stray closing paren.
+ if (!($top instanceof token)) {
+ $this->die(get_string('error_strayparen', 'qtype_formulas', $token->value), $token);
+ }
+ array_pop($parenstack);
+ }
+ }
+ // If the stack of parentheses is not empty now, we have an unmatched opening parenthesis.
+ if (!empty($parenstack)) {
+ $unmatched = end($parenstack);
+ $this->die(get_string('error_parennotclosed', 'qtype_formulas', $unmatched->value), $unmatched);
+ }
+ }
+
+ /**
+ * Translate the given input into a string that can be understood by the legacy unit parser, i. e.
+ * following all syntax rules. This allows keeping the old unit conversion system in place until
+ * we are readyd to eventually replace it.
+ *
+ * @return string
+ */
+ public function get_legacy_unit_string(): string {
+ $stack = [];
+
+ foreach ($this->statements[0] as $token) {
+ // Write numbers and units to the stack.
+ if (in_array($token->type, [token::UNIT, token::NUMBER])) {
+ $value = $token->value;
+ if (is_numeric($value) && $value < 0) {
+ $value = '(' . strval($value) . ')';
+ }
+ $stack[] = $value;
+ }
+
+ // Operators take arguments from stack and stick them together in the appropriate way.
+ if ($token->type === token::OPERATOR) {
+ $op = $token->value;
+ if ($op === '**') {
+ $op = '^';
+ }
+ if ($op === '*') {
+ $op = ' ';
+ }
+ $second = array_pop($stack);
+ $first = array_pop($stack);
+ // With the new syntax, it is possible to write e.g. (m/s^2)*kg. In older versions,
+ // everything coming after the / operator will be considered a part of the denominator,
+ // so the only way to get the kg into the numerator is to reorder the units and
+ // write them as kg*m/s^2. Long story short: if there is a division, it must come last.
+ // Note that the syntax currently does not allow more than one /, so we do not need
+ // a more sophisticated solution.
+ if (strpos($first, '/') !== false) {
+ list($second, $first) = [$first, $second];
+ }
+ // Legacy syntax allowed parens around the entire denominator, so we do that unless the
+ // denominator is just one unit.
+ if ($op === '/' && !preg_match('/^[A-Za-z]+$/', $second)) {
+ $second = '(' . $second . ')';
+ }
+ $stack[] = $first . $op . $second;
+ }
+ }
+
+ return implode('', $stack);
+ }
+}
diff --git a/lang/en/qtype_formulas.php b/lang/en/qtype_formulas.php
index abb1f57d..9375cbb0 100644
--- a/lang/en/qtype_formulas.php
+++ b/lang/en/qtype_formulas.php
@@ -165,6 +165,7 @@
$string['error_import_missing_field'] = 'Import error. Missing field: {$a} ';
$string['error_in_answer'] = 'Error in answer #{$a->answerno}: {$a->message}';
$string['error_indexoutofrange'] = 'Evaluation error: index {$a} out of range.';
+$string['error_integerexpected'] = 'Syntax error: integer expected, found {$a} instead.';
$string['error_inv_consec'] = 'When using inv(), the numbers in the list must be consecutive.';
$string['error_inv_integers'] = 'inv() expects all elements of the list to be integers; floats will be truncated.';
$string['error_inv_list'] = 'inv() expects a list.';
diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php
new file mode 100644
index 00000000..cbc09031
--- /dev/null
+++ b/tests/unit_parser_test.php
@@ -0,0 +1,176 @@
+.
+
+namespace qtype_formulas;
+
+defined('MOODLE_INTERNAL') || die();
+
+global $CFG;
+require_once($CFG->dirroot . '/question/type/formulas/questiontype.php');
+
+use Exception;
+use qtype_formulas\local\unit_parser;
+
+/**
+ * Unit tests for the unit_parser class.
+ *
+ * @package qtype_formulas
+ * @category test
+ * @copyright 2025 Philipp Imhof
+ * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
+ *
+ * @covers \qtype_formulas\local\unit_parser
+ */
+final class unit_parser_test extends \advanced_testcase {
+
+ public function test_parse_unit_FIXME_REMOVE_WHEN_FINISHED() {
+ self::assertTrue(true);
+ return;
+ $input = '(m/s)^2';
+ $parser = new unit_parser($input);
+ var_dump($parser->get_statements()[0]);
+
+ echo $parser->get_legacy_unit_string();
+ }
+
+ /**
+ * Test parsing of unit inputs.
+ *
+ * @dataProvider provide_units
+ */
+ public function test_parse_unit($expected, $input) {
+ $e = null;
+ $error = '';
+ try {
+ new unit_parser($input);
+ } catch (Exception $e) {
+ $error = $e->getMessage();
+ }
+
+ // If we are expecting an error message, the exception object should not be null and
+ // the message should match, without checking row and column number.
+ if ($expected[0] === '!') {
+ self::assertNotNull($e);
+ self::assertStringEndsWith(substr($expected, 1), $error);
+ } else {
+ self::assertNull($e);
+ }
+ }
+
+ /**
+ * Test conversion of unit inputs to legacy input format.
+ *
+ * @dataProvider provide_units
+ */
+ public function test_get_legacy_unit_string($expected, $input) {
+ $e = null;
+ try {
+ $parser = new unit_parser($input);
+ } catch (Exception $e) {
+ $e->getMessage();
+ }
+
+ // If we are not expecting an error, check that the input has been translated as expected.
+ if ($expected[0] !== '!') {
+ self::assertEquals($expected, $parser->get_legacy_unit_string());
+ } else {
+ self::assertNotNull($e);
+ }
+ }
+
+ /**
+ * Data provider for the test functions. For simplicity, we use the same provider
+ * for valid and invalid expressions. In case of invalid expressions, we put an
+ * exclamation mark (!) at the start of the error message.
+ *
+ * @return array
+ */
+ public static function provide_units(): array {
+ return [
+ ['J/(m K)', 'J / m K'],
+ ['J/(m K)', 'J / m*K'],
+ ['J/(m K)', 'J / (m K)'],
+ ['J/(m K)', 'J / (m*K)'],
+ ['m kg/(s^2)', 'm kg/s^2'],
+ ['m kg/(s^2)', 'm kg / s^2'],
+ ['m kg/(s^2)', 'm*kg / s^2'],
+ ['m kg/(s^2)', 'm*(kg / s^2)'],
+ ['kg m/(s^2)', '(m/s^2)*kg'],
+ ['kg m/(s^2)', '(m/s^2) kg'],
+ ['m kg/(s^2)', '(m (kg / s^(2)))'],
+ ['m K kg/s', 'm (kg / s) K'],
+ ['s^(-1)', 's^-1'],
+ ['s^2', 's**2'],
+ ['s^(-1)', 's**-1'],
+ ['s^(-1)', 's^(-1)'],
+ ['s^(-1)', 's**(-1)'],
+ ['s^(-1)/(m^(-1))', 's**-1 / m**-1'],
+ ['m', 'm'],
+ ['m', '(m)'],
+ ['km', 'km'],
+ ['m^2', 'm^2'],
+ ['m^2', 'm^(2)'],
+ ['m^2', '(m^2)'],
+ ['m^2', 'm**2'],
+ ['m^2', '(m**2)'],
+ ['m^2', 'm**(2)'],
+ ['m^2', 'm ^ 2'],
+ ['m^2', 'm ^ (2)'],
+ ['m^2', 'm ** 2'],
+ ['m^2', 'm ** (2)'],
+ ['m^(-2)', 'm^-2'],
+ ['m^(-2)', '(m^-2)'],
+ ['m^(-2)', 'm^(-2)'],
+ ['m^(-2)', 'm ^ -2'],
+ ['m^(-2)', 'm ^ (-2)'],
+ ['m/s', 'm/s'],
+ ['m/s', '(m)/(s)'],
+ ['m/s', '(m/s)'],
+ ['m s^(-1)', 'm s^-1'],
+ ['m s^(-1)', 'm (s^-1)'],
+ ['m s^(-1)', 'm (s^(-1))'],
+ ['m s^(-1)', 'm s^(-1)'],
+ ['m/(s^(-1))', 'm / (s^(-1))'],
+ ['m/(s^(-1))', 'm / ((s^(-1)))'],
+ ['kg m/s', 'kg m/s'],
+ ['kg m/s', 'kg (m/s)'],
+ ['kg m/s', 'kg*(m/s)'],
+ ['kg m/s', 'kg*m/s'],
+ ['kg m/s', '(kg m)/s'],
+ ['kg m/s', '(kg*m)/s'],
+ ['kg m s^(-1)', 'kg m s^-1'],
+ ['kg m^2', 'kg m^2'],
+ ['kg m^2', 'kg m ^ 2'],
+ ['kg m s^(-1)', 'kg m s ^ - 1'],
+ ['!Unit already used: m', 'm kg / m'],
+ ['!Unexpected token: 1', 'm 1/s'],
+ ['!Unexpected token: 1', '1/s'],
+ ['!Unexpected token: 1', '1 m/s'],
+ ['!Unexpected token: 2', '2/s'],
+ ['!Unexpected token: 2.1', '2.1'],
+ ['!Unexpected token: ^', '^2'],
+ ['!Unexpected token: *', '*s'],
+ ['!Unexpected token: *', 'm* *kg'],
+ ['!Unexpected token: /', '/s'],
+ ['!Unexpected token: *', 'm*'],
+ ['!Unexpected token: /', 'm/'],
+ ['!Unexpected token: ^', 'm^'],
+ ['!Unexpected token: /', 'm^(/2)'],
+ ['!Unexpected token: +', 'm^+2'],
+ ["!Unexpected input: '@'", '@'],
+ ];
+ }
+}
diff --git a/version.php b/version.php
index 7d5ff3a9..d55c6d91 100644
--- a/version.php
+++ b/version.php
@@ -25,7 +25,7 @@
defined('MOODLE_INTERNAL') || die();
$plugin->component = 'qtype_formulas';
-$plugin->version = 2025021400;
+$plugin->version = 2025021402;
$plugin->cron = 0;
$plugin->requires = 2022112800;
From ec8e47ff36c27455d0d3c716c70e21bd74f6d787 Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Sun, 9 Mar 2025 10:06:41 +0100
Subject: [PATCH 2/8] steps to parse unit conversion rules
---
classes/local/shunting_yard.php | 6 ++
classes/local/unit_parser.php | 154 ++++++++++++++++++++++++++++++--
tests/unit_parser_test.php | 25 ++++++
3 files changed, 179 insertions(+), 6 deletions(-)
diff --git a/classes/local/shunting_yard.php b/classes/local/shunting_yard.php
index 2ffd79dc..03a8855c 100644
--- a/classes/local/shunting_yard.php
+++ b/classes/local/shunting_yard.php
@@ -622,6 +622,12 @@ public static function unit_infix_to_rpn($tokens): array {
if ($value === '^') {
$value = '**';
}
+ // Exponents cannot follow a closing parenthesis, because things like (m/s)^2 cannot
+ // be translated to legacy syntax before "unit arithmetic" is fully implemented. We use
+ // $token->value instead of $value to have the operator like it was entered.
+ if ($value === '**' && $lasttype === token::CLOSING_PAREN) {
+ self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
+ }
$thisprecedence = self::get_precedence($value);
// We artificially increase the precedence of the division operator, because
// legacy versions used implicit parens around the denominator, e. g.
diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php
index 0b170817..7435f261 100644
--- a/classes/local/unit_parser.php
+++ b/classes/local/unit_parser.php
@@ -39,6 +39,34 @@
- if * is used after /, assume implicit parens, e. g. J / m * K --> J / (m * K)
- do not allow operators other than *, / and ^ as well as unary - (in exponents only)
- allow ** instead of ^
+- for the moment: disallow exponent after closing paren to avoid things like (m/s)^2
+
+
+Syntax for unit conversion rules
+
+Type 1: SI prefixes
+
+ : ... ;
+
+- at least one valid prefix
+- ; at end, if more statements follow
+- base unit single token, no number
+
+example:
+
+m : k da d c m u µ;
+s : m u µ;
+
+Type 2: conversion factors / arbitrary prefixes
+
+[] = [ = ...] ;
+
+- if first number not given --> 1
+- all further declarations always relative to base unit
+
+min = 60 s;
+h = 60 min;
+
*/
@@ -52,9 +80,77 @@
*/
class unit_parser extends parser {
+ // FIXME: allow unicode chars for micro and ohm in lexer; maybe disallow for variable names (check during assignment)
+
+ const SI_PREFIX_FACTORS = [
+ 'd' => 0.1,
+ 'c' => 0.01,
+ 'm' => 0.001,
+ 'u' => 1e-6,
+ // For convenience, we also allow U+00B5 MICRO SIGN.
+ "\u{00B5}" => 1e-6,
+ // For convenience, we also allow U+03BC GREEK SMALL LETTER MU.
+ "\u{03BC}" => 1e-6,
+ 'n' => 1e-9,
+ 'p' => 1e-12,
+ 'f' => 1e-15,
+ 'a' => 1e-18,
+ 'z' => 1e-21,
+ 'y' => 1e-24,
+ 'r' => 1e-27,
+ 'q' => 1e-30,
+ 'da' => 10,
+ 'h' => 100,
+ 'k' => 1000,
+ 'M' => 1e6,
+ 'G' => 1e9,
+ 'T' => 1e12,
+ 'P' => 1e15,
+ 'E' => 1e18,
+ 'Z' => 1e21,
+ 'Y' => 1e24,
+ 'R' => 1e27,
+ 'Q' => 1e30,
+ ];
+
+ const DEFAULT_PREFIXES = [
+ 's' => ['m', 'u', 'n', 'p', 'f'],
+ 'm' => ['k', 'da', 'c', 'd', 'm', 'u', 'n', 'p', 'f'],
+ 'g' => ['k', 'm', 'u', 'n', 'p', 'f'],
+ 'A' => ['m', 'u', 'n', 'p', 'f'],
+ 'mol' => ['m', 'u', 'n', 'p'],
+ 'K' => ['m', 'u', 'n', 'k', 'M'],
+ 'cd' => ['m', 'k', 'M', 'u', 'G'],
+ 'N' => ['M', 'k', 'm', 'u', 'n', 'p', 'f'],
+ 'J' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'],
+ 'eV' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'],
+ 'W' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'],
+ 'Pa' => ['M', 'G', 'T', 'P', 'k', 'h'],
+ 'Hz' => ['M', 'G', 'T', 'P', 'E', 'k'],
+ 'C' => ['k', 'm', 'u', 'n', 'p', 'f'],
+ 'V' => ['M', 'G', 'k', 'm', 'u', 'n', 'p', 'f'],
+ 'ohm' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'],
+ 'F' => ['m', 'u', 'n', 'p', 'f'],
+ 'T' => ['k', 'm', 'u', 'n', 'p'],
+ 'H' => ['k', 'm', 'u', 'n', 'p'],
+ ];
+
+ const DEFAULT_SPECIAL_RULES = [
+ // For convenience, we also allow U+2126 OHM SIGN.
+ "\u{2126}" => ['ohm' => 1],
+ // For convenience, we also allow U+03A9 GREEK CAPITAL LETTER OMEGA.
+ "\u{03A9}" => ['ohm' => 1],
+ 'min' => ['s' => 60],
+ 'h' => ['s' => 3600],
+ 'J' => ['eV' => 6.24150947e+18],
+ ];
+
/** @var array list of used units */
private array $unitlist = [];
+ /** @var string list of all units with their prefixes, allowing to find base units quickly */
+ private string $baseunitmap = '';
+
/**
* Create a unit parser class and have it parse a given input. The input can be given as a string, in
* which case it will first be sent to the lexer. If that step has already been made, the constructor
@@ -69,16 +165,29 @@ public function __construct($tokenlist) {
$tokenlist = $lexer->get_tokens();
}
$this->tokenlist = $tokenlist;
- $this->count = count($tokenlist);
// Check for unbalanced / mismatched parentheses.
$this->check_parens();
- // Whether we have already seen a slash or the number one (except in exponents).
+ // Perform basic syntax check, including classification of IDENTIFIER tokens
+ // to UNIT tokens.
+ $this->check_syntax();
+
+ // Run the tokens through an adapted shunting yard algorithm to bring them into
+ // RPN notation.
+ $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist);
+
+ // Build base unit map that will be used to find the base unit for a given unit,
+ // e. g. find s from ms or Pa from hPa.
+ $this->build_base_unit_map();
+ }
+
+ protected function check_syntax(): void {
+ // Whether we have already seen a slash or a unit and whether we are in an exponent.
$seenslash = false;
$seenunit = false;
$inexponent = false;
- foreach ($tokenlist as $token) {
+ foreach ($this->tokenlist as $token) {
// The use of functions is not permitted in units, so all identifiers will be classified
// as UNIT tokens.
if ($token->type === token::IDENTIFIER) {
@@ -96,7 +205,8 @@ public function __construct($tokenlist) {
continue;
}
- // Do various syntax checks for operators.
+ // Do various syntax checks for operators. We do them separately in order to allow
+ // for more specific error messages, if needed.
if ($token->type === token::OPERATOR) {
// We can only accept an operator if there has been at least one unit before.
if (!$seenunit) {
@@ -151,12 +261,44 @@ public function __construct($tokenlist) {
}
// The last token must be a number, a unit or a closing parenthesis.
- $finaltoken = end($tokenlist);
+ $finaltoken = end($this->tokenlist);
if (!in_array($finaltoken->type, [token::UNIT, token::NUMBER, token::CLOSING_PAREN])) {
$this->die(get_string('error_unexpectedtoken', 'qtype_formulas', $token->value), $token);
}
+ }
- $this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist);
+ protected function build_base_unit_map(): void {
+ // First, we add all the built-in default prefix rules.
+ foreach (self::DEFAULT_PREFIXES as $base => $prefixes) {
+ foreach ($prefixes as $prefix) {
+ $this->baseunitmap .= '|' . $prefix . $base . ':' . $base;
+ }
+ }
+
+ // Next, the built-in special prefix rules.
+
+
+ // Finally, we add user-defined rules.
+ }
+
+ protected function find_base_unit(string $unit): string {
+ // Example, must be built from config / rules. Format "pipe - unit with prefix - colon - base unit".
+ // Our definitions first, user's definition last.
+ $map = '|s:s|ms:s|us:s|µs:s|cm:m|dm:m|hPa:Pa|kg:g';
+
+ $matches = [];
+ preg_match_all('/\|' . $unit . ':([^|]+)/', $map, $matches);
+
+ // Array $matches has two entries, $matches[0] are full pattern matches (e.g. '|ms:s') and
+ // $matches[1] are matches of base units. If there are no matches at all, the unit was not found.
+ // This cannot normally happen. If it does, we return the unit as-is.
+ if (count($matches[1]) === 0) {
+ return $unit;
+ }
+
+ // In all other cases, we return the last possible match. In most cases there will be only one,
+ // but if there are more than one, the user-defined unit should be taken.
+ return end($matches[1]);
}
/**
diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php
index cbc09031..2820d0a8 100644
--- a/tests/unit_parser_test.php
+++ b/tests/unit_parser_test.php
@@ -22,6 +22,8 @@
require_once($CFG->dirroot . '/question/type/formulas/questiontype.php');
use Exception;
+use qtype_formulas\local\lexer;
+use qtype_formulas\local\token;
use qtype_formulas\local\unit_parser;
/**
@@ -33,6 +35,7 @@
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*
* @covers \qtype_formulas\local\unit_parser
+ * @covers \qtype_formulas\local\shunting_yard
*/
final class unit_parser_test extends \advanced_testcase {
@@ -46,6 +49,26 @@ public function test_parse_unit_FIXME_REMOVE_WHEN_FINISHED() {
echo $parser->get_legacy_unit_string();
}
+ public function test_parse_unit_rules_FIXME() {
+ $input = 'm: k da d c m u; s: m u; min = 60 s;';
+
+
+
+ $lexer = new lexer($input);
+ $tokens = $lexer->get_tokens();
+ var_dump($tokens);
+
+ $rules = [];
+ foreach ($tokens as $token) {
+
+
+ if ($token->type === token::END_OF_STATEMENT) {
+ $rules[] = $currentrule;
+ }
+ }
+
+ }
+
/**
* Test parsing of unit inputs.
*
@@ -170,6 +193,8 @@ public static function provide_units(): array {
['!Unexpected token: ^', 'm^'],
['!Unexpected token: /', 'm^(/2)'],
['!Unexpected token: +', 'm^+2'],
+ ['!Unexpected token: ^', '(m/s)^2'],
+ ['!Unexpected token: **', '(m/s)**2'],
["!Unexpected input: '@'", '@'],
];
}
From 9ff673af49fe60f7c15f858658ee279fcd105f6b Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Sun, 13 Apr 2025 10:57:59 +0200
Subject: [PATCH 3/8] prepare compatibility layer for units
---
classes/local/unit_parser.php | 173 ++--------------------------------
tests/unit_parser_test.php | 32 -------
2 files changed, 10 insertions(+), 195 deletions(-)
diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php
index 7435f261..97feb22b 100644
--- a/classes/local/unit_parser.php
+++ b/classes/local/unit_parser.php
@@ -16,61 +16,6 @@
namespace qtype_formulas\local;
-/*
-
-Notes about current implementation:
-
-- only 2 operators allowed: ^ for exponentiation and / for division
-- only one / allowed
-- no * allowed
-- no parens allowed, except in exponent or around the *entire* denominator
-- right side of / is considered in parens, even if not written, e.g. J/m*K --> J / (m*K)
-- only units, no numbers except for exponents
-- positive or negative exponents allowed
-- negative exponents allowed with or without parens
-- same unit not allowed more than once
-
-Future implementation, must be 100% backwards compatible
-
-- allow parens everywhere
-- allow * for explicit multiplication of units
-- still only allow one /
-- still not allow same unit more than once
-- if * is used after /, assume implicit parens, e. g. J / m * K --> J / (m * K)
-- do not allow operators other than *, / and ^ as well as unary - (in exponents only)
-- allow ** instead of ^
-- for the moment: disallow exponent after closing paren to avoid things like (m/s)^2
-
-
-Syntax for unit conversion rules
-
-Type 1: SI prefixes
-
- : ... ;
-
-- at least one valid prefix
-- ; at end, if more statements follow
-- base unit single token, no number
-
-example:
-
-m : k da d c m u µ;
-s : m u µ;
-
-Type 2: conversion factors / arbitrary prefixes
-
-[] = [ = ...] ;
-
-- if first number not given --> 1
-- all further declarations always relative to base unit
-
-min = 60 s;
-h = 60 min;
-
-
-*/
-
-
/**
* Parser for units for qtype_formulas
*
@@ -80,77 +25,9 @@
*/
class unit_parser extends parser {
- // FIXME: allow unicode chars for micro and ohm in lexer; maybe disallow for variable names (check during assignment)
-
- const SI_PREFIX_FACTORS = [
- 'd' => 0.1,
- 'c' => 0.01,
- 'm' => 0.001,
- 'u' => 1e-6,
- // For convenience, we also allow U+00B5 MICRO SIGN.
- "\u{00B5}" => 1e-6,
- // For convenience, we also allow U+03BC GREEK SMALL LETTER MU.
- "\u{03BC}" => 1e-6,
- 'n' => 1e-9,
- 'p' => 1e-12,
- 'f' => 1e-15,
- 'a' => 1e-18,
- 'z' => 1e-21,
- 'y' => 1e-24,
- 'r' => 1e-27,
- 'q' => 1e-30,
- 'da' => 10,
- 'h' => 100,
- 'k' => 1000,
- 'M' => 1e6,
- 'G' => 1e9,
- 'T' => 1e12,
- 'P' => 1e15,
- 'E' => 1e18,
- 'Z' => 1e21,
- 'Y' => 1e24,
- 'R' => 1e27,
- 'Q' => 1e30,
- ];
-
- const DEFAULT_PREFIXES = [
- 's' => ['m', 'u', 'n', 'p', 'f'],
- 'm' => ['k', 'da', 'c', 'd', 'm', 'u', 'n', 'p', 'f'],
- 'g' => ['k', 'm', 'u', 'n', 'p', 'f'],
- 'A' => ['m', 'u', 'n', 'p', 'f'],
- 'mol' => ['m', 'u', 'n', 'p'],
- 'K' => ['m', 'u', 'n', 'k', 'M'],
- 'cd' => ['m', 'k', 'M', 'u', 'G'],
- 'N' => ['M', 'k', 'm', 'u', 'n', 'p', 'f'],
- 'J' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'],
- 'eV' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'],
- 'W' => ['M', 'G', 'T', 'P', 'k', 'm', 'u', 'n', 'p', 'f'],
- 'Pa' => ['M', 'G', 'T', 'P', 'k', 'h'],
- 'Hz' => ['M', 'G', 'T', 'P', 'E', 'k'],
- 'C' => ['k', 'm', 'u', 'n', 'p', 'f'],
- 'V' => ['M', 'G', 'k', 'm', 'u', 'n', 'p', 'f'],
- 'ohm' => ['M', 'G', 'T', 'P', 'k', 'm', 'u'],
- 'F' => ['m', 'u', 'n', 'p', 'f'],
- 'T' => ['k', 'm', 'u', 'n', 'p'],
- 'H' => ['k', 'm', 'u', 'n', 'p'],
- ];
-
- const DEFAULT_SPECIAL_RULES = [
- // For convenience, we also allow U+2126 OHM SIGN.
- "\u{2126}" => ['ohm' => 1],
- // For convenience, we also allow U+03A9 GREEK CAPITAL LETTER OMEGA.
- "\u{03A9}" => ['ohm' => 1],
- 'min' => ['s' => 60],
- 'h' => ['s' => 3600],
- 'J' => ['eV' => 6.24150947e+18],
- ];
-
/** @var array list of used units */
private array $unitlist = [];
- /** @var string list of all units with their prefixes, allowing to find base units quickly */
- private string $baseunitmap = '';
-
/**
* Create a unit parser class and have it parse a given input. The input can be given as a string, in
* which case it will first be sent to the lexer. If that step has already been made, the constructor
@@ -176,12 +53,14 @@ public function __construct($tokenlist) {
// Run the tokens through an adapted shunting yard algorithm to bring them into
// RPN notation.
$this->statements[] = shunting_yard::unit_infix_to_rpn($this->tokenlist);
-
- // Build base unit map that will be used to find the base unit for a given unit,
- // e. g. find s from ms or Pa from hPa.
- $this->build_base_unit_map();
}
+ /**
+ * Check if the unit expression respects all syntax constraints, i. e. only valid operators (division,
+ * multiplication, exponentiation), valid use of parentheses, only one division operator. We only allow
+ * stuff that can be converted into the legacy syntax, because we still rely on the original unit
+ * conversion code for now. *
+ */
protected function check_syntax(): void {
// Whether we have already seen a slash or a unit and whether we are in an exponent.
$seenslash = false;
@@ -267,42 +146,10 @@ protected function check_syntax(): void {
}
}
- protected function build_base_unit_map(): void {
- // First, we add all the built-in default prefix rules.
- foreach (self::DEFAULT_PREFIXES as $base => $prefixes) {
- foreach ($prefixes as $prefix) {
- $this->baseunitmap .= '|' . $prefix . $base . ':' . $base;
- }
- }
-
- // Next, the built-in special prefix rules.
-
-
- // Finally, we add user-defined rules.
- }
-
- protected function find_base_unit(string $unit): string {
- // Example, must be built from config / rules. Format "pipe - unit with prefix - colon - base unit".
- // Our definitions first, user's definition last.
- $map = '|s:s|ms:s|us:s|µs:s|cm:m|dm:m|hPa:Pa|kg:g';
-
- $matches = [];
- preg_match_all('/\|' . $unit . ':([^|]+)/', $map, $matches);
-
- // Array $matches has two entries, $matches[0] are full pattern matches (e.g. '|ms:s') and
- // $matches[1] are matches of base units. If there are no matches at all, the unit was not found.
- // This cannot normally happen. If it does, we return the unit as-is.
- if (count($matches[1]) === 0) {
- return $unit;
- }
-
- // In all other cases, we return the last possible match. In most cases there will be only one,
- // but if there are more than one, the user-defined unit should be taken.
- return end($matches[1]);
- }
-
/**
- * Check whether a given unit has already been used.
+ * Check whether a given unit has already been used. As we currently still use the original code for
+ * unit conversion stuff, we stick to the same behaviour. Hence, we do not take into account the prefixes,
+ * i. e. km and m will be considered as different units.
*
* @param token $token token containing the unit
* @return bool
@@ -347,7 +194,7 @@ protected function check_parens(): void {
/**
* Translate the given input into a string that can be understood by the legacy unit parser, i. e.
* following all syntax rules. This allows keeping the old unit conversion system in place until
- * we are readyd to eventually replace it.
+ * we are ready to eventually replace it.
*
* @return string
*/
diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php
index 2820d0a8..ab3b9b45 100644
--- a/tests/unit_parser_test.php
+++ b/tests/unit_parser_test.php
@@ -22,8 +22,6 @@
require_once($CFG->dirroot . '/question/type/formulas/questiontype.php');
use Exception;
-use qtype_formulas\local\lexer;
-use qtype_formulas\local\token;
use qtype_formulas\local\unit_parser;
/**
@@ -39,36 +37,6 @@
*/
final class unit_parser_test extends \advanced_testcase {
- public function test_parse_unit_FIXME_REMOVE_WHEN_FINISHED() {
- self::assertTrue(true);
- return;
- $input = '(m/s)^2';
- $parser = new unit_parser($input);
- var_dump($parser->get_statements()[0]);
-
- echo $parser->get_legacy_unit_string();
- }
-
- public function test_parse_unit_rules_FIXME() {
- $input = 'm: k da d c m u; s: m u; min = 60 s;';
-
-
-
- $lexer = new lexer($input);
- $tokens = $lexer->get_tokens();
- var_dump($tokens);
-
- $rules = [];
- foreach ($tokens as $token) {
-
-
- if ($token->type === token::END_OF_STATEMENT) {
- $rules[] = $currentrule;
- }
- }
-
- }
-
/**
* Test parsing of unit inputs.
*
From 0c5c72234a69d5fb9955c05e065ce72da23ec780 Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Sun, 13 Apr 2025 11:24:01 +0200
Subject: [PATCH 4/8] don't change version
---
version.php | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/version.php b/version.php
index d55c6d91..7d5ff3a9 100644
--- a/version.php
+++ b/version.php
@@ -25,7 +25,7 @@
defined('MOODLE_INTERNAL') || die();
$plugin->component = 'qtype_formulas';
-$plugin->version = 2025021402;
+$plugin->version = 2025021400;
$plugin->cron = 0;
$plugin->requires = 2022112800;
From d912860399f129065156fe3f59a75222c18b80b2 Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Sun, 13 Apr 2025 15:04:44 +0200
Subject: [PATCH 5/8] added tests, cleanup
---
classes/local/shunting_yard.php | 9 ---------
tests/unit_parser_test.php | 28 ++++++++++++++++++++++++----
2 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/classes/local/shunting_yard.php b/classes/local/shunting_yard.php
index 03a8855c..e4324dd8 100644
--- a/classes/local/shunting_yard.php
+++ b/classes/local/shunting_yard.php
@@ -609,10 +609,6 @@ public static function unit_infix_to_rpn($tokens): array {
// Deal with all the possible operators...
case token::OPERATOR:
- // Expressions must not start with an operator.
- if (is_null($lasttoken)) {
- self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
- }
// Operators must not follow an opening parenthesis, except for the unary minus.
if ($lasttype === token::OPENING_PAREN && $value !== '-') {
self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
@@ -646,11 +642,6 @@ public static function unit_infix_to_rpn($tokens): array {
// Put the operator on the stack.
$opstack[] = $token;
break;
-
- // If we still haven't dealt with the token, there must be a problem with the input.
- default:
- self::die(get_string('error_unexpectedtoken', 'qtype_formulas', $value), $token);
-
}
$lasttoken = $token;
diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php
index ab3b9b45..15150729 100644
--- a/tests/unit_parser_test.php
+++ b/tests/unit_parser_test.php
@@ -42,7 +42,7 @@ final class unit_parser_test extends \advanced_testcase {
*
* @dataProvider provide_units
*/
- public function test_parse_unit($expected, $input) {
+ public function test_parse_unit($expected, $input): void {
$e = null;
$error = '';
try {
@@ -66,7 +66,7 @@ public function test_parse_unit($expected, $input) {
*
* @dataProvider provide_units
*/
- public function test_get_legacy_unit_string($expected, $input) {
+ public function test_get_legacy_unit_string($expected, $input): void {
$e = null;
try {
$parser = new unit_parser($input);
@@ -146,7 +146,27 @@ public static function provide_units(): array {
['kg m^2', 'kg m^2'],
['kg m^2', 'kg m ^ 2'],
['kg m s^(-1)', 'kg m s ^ - 1'],
- ['!Unit already used: m', 'm kg / m'],
+ ['!Syntax error: integer expected, found 2.5 instead.', 'm^2.5'],
+ ["!Unbalanced parenthesis, stray ')' found.", 'm/s)'],
+ ["!Unbalanced parenthesis, '(' is never closed.", '(m/s'],
+ ["!Unexpected input: '@'", '@'],
+ ['!Unexpected token: s', 'm^s'],
+ ['!Unexpected token: -', 'm*-s'],
+ ['!Unexpected token: /', 'm/s/K'],
+ ['!Unexpected token: /', 'm/s * kg/m^2'],
+ ['!Unexpected token: π', 'm*π'],
+ ['!Unexpected token: ,', 'm,s'],
+ ['!Unexpected token: [', '[m/s]'],
+ ['!Unexpected token: )', '(m*)'],
+ ['!Unexpected token: +', 'm+km'],
+ ['!Unexpected token: *', '*'],
+ ['!Unexpected token: *', '*m'],
+ ['!Unexpected token: /', '/m'],
+ ['!Unexpected token: *', 'm*(*s)'],
+ ['!Unexpected token: *', '(*m)'],
+ ['!Unexpected token: **', '(**m)'],
+ ['!Unexpected token: ^', '(^m)'],
+ ['!Unexpected token: {', '{m/s}'],
['!Unexpected token: 1', 'm 1/s'],
['!Unexpected token: 1', '1/s'],
['!Unexpected token: 1', '1 m/s'],
@@ -163,7 +183,7 @@ public static function provide_units(): array {
['!Unexpected token: +', 'm^+2'],
['!Unexpected token: ^', '(m/s)^2'],
['!Unexpected token: **', '(m/s)**2'],
- ["!Unexpected input: '@'", '@'],
+ ['!Unit already used: m', 'm kg / m'],
];
}
}
From 1afc557608a08a127d499a1230db5e122c332a05 Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Tue, 15 Apr 2025 09:39:04 +0200
Subject: [PATCH 6/8] include new unit parser in grading and validation
---
question.php | 18 ++++++++++++++++--
questiontype.php | 18 ++++++++++++++++--
2 files changed, 32 insertions(+), 4 deletions(-)
diff --git a/question.php b/question.php
index b05a05b2..23997955 100644
--- a/question.php
+++ b/question.php
@@ -36,6 +36,7 @@
use qtype_formulas\local\random_parser;
use qtype_formulas\local\parser;
use qtype_formulas\local\token;
+use qtype_formulas\local\unit_parser;
use qtype_formulas\unit_conversion_rules;
defined('MOODLE_INTERNAL') || die();
@@ -1481,7 +1482,7 @@ public function grade(array $response, bool $finalsubmit = false): array {
// formulas, we must wrap the answers in quotes before we move on. Also, we reset the conversion
// factor, because it is not needed for algebraic answers.
if ($isalgebraic) {
- $response = self::wrap_algebraic_formulas_in_quotes($response);
+ $response = self::wrap_algebraic_formulas_in_quotes($response);
$conversionfactor = 1;
}
@@ -1572,7 +1573,20 @@ private function is_compatible_unit(string $studentsunit) {
$checkunit->assign_default_rules($this->ruleid, $entry[1]);
$checkunit->assign_additional_rules($this->otherrule);
- $checked = $checkunit->check_convertibility($studentsunit, $this->postunit);
+ // Use the compatibility layer to parse the unit string and convert it into the old format.
+ // If parsing fails, we can immediately return false.
+ try {
+ $parser = new unit_parser($studentsunit);
+ $postunitparser = new unit_parser($this->postunit);
+ } catch (Exception $e) {
+ // TODO: convert to non-capturing catch
+ return false;
+ }
+
+ $checked = $checkunit->check_convertibility(
+ $parser->get_legacy_unit_string(),
+ $postunitparser->get_legacy_unit_string(),
+ );
if ($checked->convertible) {
return $checked->cfactor;
}
diff --git a/questiontype.php b/questiontype.php
index 4d26907e..d8de20f5 100644
--- a/questiontype.php
+++ b/questiontype.php
@@ -31,6 +31,7 @@
use qtype_formulas\local\answer_parser;
use qtype_formulas\local\parser;
use qtype_formulas\local\token;
+use qtype_formulas\local\unit_parser;
defined('MOODLE_INTERNAL') || die();
@@ -1228,9 +1229,22 @@ public function check_variables_and_expressions(object $data, array $parts): obj
// If a unit has been provided, check whether it can be parsed.
if (!empty($part->postunit)) {
try {
- $unitcheck->parse_targets($part->postunit);
+ $unitparser = new unit_parser($part->postunit);
+ $unitcheck->parse_targets($unitparser->get_legacy_unit_string());
} catch (Exception $e) {
- $errors["postunit[$i]"] = get_string('error_unit', 'qtype_formulas');
+ $trace = $e->getTraceAsString();
+ // If we are coming from the newer code, use the detailed error message without
+ // the row/column number. Otherwise just use the generic message provided by the
+ // legacy code.
+ // TODO: Use str_contains() once we drop support for PHP < 8.0.
+ if (strstr($trace, 'unit_parser.php') !== false) {
+ // The error message may contain line and column numbers, but they don't make
+ // sense in this context, so we'd rather remove them.
+ $errors["postunit[$i]"] = preg_replace('/([^:]+:)([^:]+:)/', '', $e->getMessage());
+
+ } else {
+ $errors["postunit[$i]"] = get_string('error_unit', 'qtype_formulas');
+ }
}
}
From fa879718bd9b3ba4a83c56d4f0d46d622b8596db Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Tue, 15 Apr 2025 10:17:51 +0200
Subject: [PATCH 7/8] deal with empty unit input
---
classes/local/unit_parser.php | 7 +++++++
tests/questiontype_test.php | 12 ++++++++++--
tests/unit_parser_test.php | 5 +++--
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/classes/local/unit_parser.php b/classes/local/unit_parser.php
index 97feb22b..165b7017 100644
--- a/classes/local/unit_parser.php
+++ b/classes/local/unit_parser.php
@@ -43,6 +43,13 @@ public function __construct($tokenlist) {
}
$this->tokenlist = $tokenlist;
+ // The unit_parser might have been called on an empty input. If this is the case, we store an
+ // empty statement and stop here.
+ if (empty($this->tokenlist)) {
+ $this->statements[] = [];
+ return;
+ }
+
// Check for unbalanced / mismatched parentheses.
$this->check_parens();
diff --git a/tests/questiontype_test.php b/tests/questiontype_test.php
index 9778eb35..9b993ca0 100644
--- a/tests/questiontype_test.php
+++ b/tests/questiontype_test.php
@@ -467,10 +467,18 @@ public static function provide_single_part_data_for_form_validation(): array {
'correctness' => [0 => '1/0'],
],
],
+ [[], ['postunit' => [0 => 'a/b*c']],
+ ],
+ [
+ ['postunit[0]' => 'Unit already used: m'],
+ [
+ 'postunit' => [0 => 'm*m'],
+ ],
+ ],
[
- ['postunit[0]' => get_string('error_unit', 'qtype_formulas')],
+ ['postunit[0]' => 'Unexpected token: ^'],
[
- 'postunit' => [0 => 'a/b*c'],
+ 'postunit' => [0 => '(m/s)^2'],
],
],
[
diff --git a/tests/unit_parser_test.php b/tests/unit_parser_test.php
index 15150729..1488ab85 100644
--- a/tests/unit_parser_test.php
+++ b/tests/unit_parser_test.php
@@ -53,7 +53,7 @@ public function test_parse_unit($expected, $input): void {
// If we are expecting an error message, the exception object should not be null and
// the message should match, without checking row and column number.
- if ($expected[0] === '!') {
+ if (!empty($expected) && $expected[0] === '!') {
self::assertNotNull($e);
self::assertStringEndsWith(substr($expected, 1), $error);
} else {
@@ -75,7 +75,7 @@ public function test_get_legacy_unit_string($expected, $input): void {
}
// If we are not expecting an error, check that the input has been translated as expected.
- if ($expected[0] !== '!') {
+ if (empty($expected) || $expected[0] !== '!') {
self::assertEquals($expected, $parser->get_legacy_unit_string());
} else {
self::assertNotNull($e);
@@ -91,6 +91,7 @@ public function test_get_legacy_unit_string($expected, $input): void {
*/
public static function provide_units(): array {
return [
+ ['', ''],
['J/(m K)', 'J / m K'],
['J/(m K)', 'J / m*K'],
['J/(m K)', 'J / (m K)'],
From 95093d2e3675316a3531c6c04d37d1819ccebd75 Mon Sep 17 00:00:00 2001
From: Philipp Imhof <52650214+PhilippImhof@users.noreply.github.com>
Date: Tue, 15 Apr 2025 17:48:01 +0200
Subject: [PATCH 8/8] add test for grading with unit compatibility layer
---
tests/question_test.php | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/tests/question_test.php b/tests/question_test.php
index 35242b4f..df1b01af 100644
--- a/tests/question_test.php
+++ b/tests/question_test.php
@@ -800,6 +800,32 @@ public function test_grade_parts_that_can_be_graded_6(): void {
self::assertEquals($expected, $partscores);
}
+ public function test_grade_with_unit_compatibility_layer(): void {
+ $q = $this->get_test_formulas_question('testmethodsinparts');
+
+ // We overwrite the postunit. In the first part, we use the old syntax for the model unit and
+ // the new syntax in the response. In the second part, we use the new syntax for the model
+ // unit and the old syntax in the response.
+ $q->parts[0]->postunit = 'm s';
+ $q->parts[1]->postunit = 'm*s';
+
+ $q->start_attempt(new question_attempt_step(), 1);
+
+ // phpcs:ignore Universal.Arrays.DuplicateArrayKey.Found
+ $response = ['0_' => '40 m*s', '1_0' => '40', '1_1' => 'm s', '2_0' => '40', '3_0' => '40'];
+ $partscores = $q->grade_parts_that_can_be_graded($response, [], false);
+
+ // The latest $response is correct for all parts #0 and #2. Note that the penalty value will
+ // be set according to the question data; it does not mean that a deduction occurred.
+ $expected = [
+ '0' => new qbehaviour_adaptivemultipart_part_result('0', 1, 0.3),
+ '1' => new qbehaviour_adaptivemultipart_part_result('1', 1, 0.3),
+ '2' => new qbehaviour_adaptivemultipart_part_result('2', 1, 0.3),
+ '3' => new qbehaviour_adaptivemultipart_part_result('3', 1, 0.3),
+ ];
+ self::assertEquals($expected, $partscores);
+ }
+
public function test_get_parts_and_weights_singlenum(): void {
$q = $this->get_test_formulas_question('testsinglenum');