From 7dad6af3b190ea42b9f1c5de7728f8c3f36ba5f6 Mon Sep 17 00:00:00 2001 From: Kevin Jilissen Date: Tue, 25 Nov 2025 08:04:44 +0100 Subject: [PATCH 1/2] Re-encode submission source in UTF-8 Try to re-encode the submission source if it currently is not valid in UTF-8. Add some extra safeguard by forcing another sanity check re-encode from UTF-8 to the original encoding, which filters out binary blobs and verifies that the re-encoding is non-destructive. --- .../Controller/Jury/SubmissionController.php | 2 +- webapp/src/Twig/TwigExtension.php | 4 ++-- webapp/src/Utils/Utils.php | 17 +++++++++++++++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/webapp/src/Controller/Jury/SubmissionController.php b/webapp/src/Controller/Jury/SubmissionController.php index 0bd7bac6dc..3097765723 100644 --- a/webapp/src/Controller/Jury/SubmissionController.php +++ b/webapp/src/Controller/Jury/SubmissionController.php @@ -920,7 +920,7 @@ public function sourceAction( $files[$f->getFilename()][$submitId] = [ 'rank' => $f->getRank(), 'filename' => $f->getFilename(), - 'source' => mb_check_encoding($f->getSourcecode(), 'UTF-8') ? $f->getSourcecode() : "Could not display file as UTF-8, is it binary?", + 'source' => Utils::reencodeUtf8($f->getSourcecode()), ]; // Keep track of the single filename within a submission for handling renaming. diff --git a/webapp/src/Twig/TwigExtension.php b/webapp/src/Twig/TwigExtension.php index fcfae5a4b4..20faf786d6 100644 --- a/webapp/src/Twig/TwigExtension.php +++ b/webapp/src/Twig/TwigExtension.php @@ -892,7 +892,7 @@ public function codeEditor( HTML; $rank = $index; $id = sprintf('editor%s', $rank); - $source = mb_check_encoding($code, 'UTF-8') ? $code : "Could not display file as UTF-8, is it binary?"; + $source = Utils::reencodeUtf8($code); if ($elementToUpdate) { $extraForEdit = << { @@ -942,7 +942,7 @@ public function getMonacoModel(SubmissionFile $file): string } $this->renderedSources[$file->getSubmitfileid()] = true; - $source = mb_check_encoding($file->getSourcecode(), 'UTF-8') ? $file->getSourcecode() : "Could not display file as UTF-8, is it binary?"; + $source = Utils::reencodeUtf8($file->getSourcecode()); return sprintf( << */ From 224dc7e28832b291313e629eae31d8c10868c2e6 Mon Sep 17 00:00:00 2001 From: Kevin Jilissen Date: Thu, 27 Nov 2025 10:56:01 +0100 Subject: [PATCH 2/2] Add a configuration check for encoding detection --- webapp/src/Service/CheckConfigService.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/webapp/src/Service/CheckConfigService.php b/webapp/src/Service/CheckConfigService.php index 476038a9b3..5f1c76f4b3 100644 --- a/webapp/src/Service/CheckConfigService.php +++ b/webapp/src/Service/CheckConfigService.php @@ -52,6 +52,7 @@ public function runAll(): array 'php_version' => $this->checkPhpVersion(), 'php_extensions' => $this->checkPhpExtensions(), 'php_settings' => $this->checkPhpSettings(), + 'mbstring_settings' => $this->checkMbstringSettings(), 'mysql_settings' => $this->checkMysqlSettings(), ]; @@ -197,6 +198,23 @@ public function checkPhpSettings(): ConfigCheckItem ); } + public function checkMbstringSettings(): ConfigCheckItem + { + $this->stopwatch->start(__FUNCTION__); + + $desc = "\nFor submission source code displaying, the detect order should at least contain all expected submission encodings:\n"; + $desc .= sprintf(" - `mbstring.detect_order` should at least contain `UTF-8` (now set to `%s`).\n", implode(', ', mb_detect_order())); + $desc .= sprintf(" - Supported encodings: `%s`.\n", implode(', ', mb_list_encodings())); + $result = in_array('UTF-8', mb_detect_order()) ? 'O' : 'W'; + + $this->stopwatch->stop(__FUNCTION__); + return new ConfigCheckItem( + caption: 'PHP multibyte string settings', + result: $result, + desc: $desc + ); + } + public function checkMysqlSettings(): ConfigCheckItem { $this->stopwatch->start(__FUNCTION__);