Skip to content

Commit 03ec115

Browse files
committed
Fix: Display Unicode correctly in unhandled MXP tags
Fixes #8482 Root cause: Two issues prevented non-ASCII characters from displaying: 1. Tag content was decoded using Latin-1 instead of connection encoding 2. HANDLER_INSERT_ENTITY_LIT mode uses toLatin1() which destroys Unicode Solution: - Added getEncoding() interface to TMxpClient/TMxpMudlet - Use connection encoding to decode tag content bytes - Changed to HANDLER_INSERT_ENTITY_SYS to preserve Unicode characters - Added Qt6::Core5Compat dependency for QTextCodec support in tests
1 parent 1d0365d commit 03ec115

File tree

5 files changed

+43
-3
lines changed

5 files changed

+43
-3
lines changed

src/TMxpClient.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ class TMxpClient
103103
}
104104

105105
virtual void setCaptionForSendEvent(const QString& caption) { Q_UNUSED(caption) }
106+
107+
// Get the encoding used by the connection (for proper decoding of MXP tags)
108+
// Default implementation returns UTF-8 for test clients
109+
virtual QByteArray getEncoding() const { return QByteArrayLiteral("UTF-8"); }
106110

107111
// Get the console wrap width for layout purposes (e.g., HR tag)
108112
virtual int getWrapWidth() const { return 80; } // Default fallback

src/TMxpMudlet.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,11 @@ bool TMxpMudlet::isTagAllowedInMode(const QString& tagName, TMXPMode mode) const
265265
return openModeTags.contains(tagName);
266266
}
267267

268+
QByteArray TMxpMudlet::getEncoding() const
269+
{
270+
return mpHost->mTelnet.getEncoding();
271+
}
272+
268273
int TMxpMudlet::getWrapWidth() const
269274
{
270275
// Return the host's configured wrap width, with a sensible minimum

src/TMxpMudlet.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ class TMxpMudlet : public TMxpClient
143143

144144
QStack<TMxpEvent> mPendingSendEvents;
145145

146+
// Get the encoding used by the connection
147+
QByteArray getEncoding() const override;
146148
bool shouldLockModeToSecure() const override;
147149

148150
private:

src/TMxpProcessor.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "TMxpProcessor.h"
2424
#include <QDebug>
25+
#include <QTextCodec>
2526

2627
bool TMxpProcessor::setMode(const QString& code)
2728
{
@@ -163,7 +164,7 @@ TMxpProcessingResult TMxpProcessor::processMxpInput(char& ch, bool resolveCustom
163164
&& mMxpTagBuilder.isInsideTag()
164165
&& !mMxpTagBuilder.isQuotedSequence()
165166
&& !mMxpTagBuilder.isInsideComment()) {
166-
lastEntityValue = QStringLiteral("<") + QString::fromStdString(mMxpTagBuilder.getRawTagContent());
167+
lastEntityValue = qsl("<") + QString::fromStdString(mMxpTagBuilder.getRawTagContent());
167168
mMxpTagBuilder.resetForNewTag();
168169
return HANDLER_INSERT_ENTITY_LIT;
169170
}
@@ -173,6 +174,30 @@ TMxpProcessingResult TMxpProcessor::processMxpInput(char& ch, bool resolveCustom
173174
}
174175

175176
if (mMxpTagBuilder.hasTag()) {
177+
// Save raw tag content before it gets cleared by buildTag()
178+
// Note: getRawTagContent() returns content INCLUDING the closing '>'
179+
const std::string rawTagBytes = mMxpTagBuilder.getRawTagContent();
180+
const QByteArray encoding = mpMxpClient->getEncoding();
181+
182+
// Build the tag content string with proper encoding
183+
QString rawTagContent = qsl("<");
184+
185+
// Decode the raw bytes using the proper encoding
186+
if (encoding == qsl("UTF-8")) {
187+
rawTagContent += QString::fromStdString(rawTagBytes);
188+
} else if (encoding == qsl("ISO 8859-1")) {
189+
rawTagContent += QString::fromLatin1(rawTagBytes.c_str(), static_cast<int>(rawTagBytes.length()));
190+
} else {
191+
// For other encodings (GBK, BIG5, EUC-KR, etc.), use QTextCodec
192+
QTextCodec* codec = QTextCodec::codecForName(encoding);
193+
if (codec) {
194+
rawTagContent += codec->toUnicode(rawTagBytes.c_str(), static_cast<int>(rawTagBytes.length()));
195+
} else {
196+
// Fallback to UTF-8
197+
rawTagContent += QString::fromStdString(rawTagBytes);
198+
}
199+
}
200+
176201
QScopedPointer<MxpTag> const tag(mMxpTagBuilder.buildTag());
177202

178203
// qDebug() << "TAG RECEIVED: " << tag->asString();
@@ -183,9 +208,11 @@ TMxpProcessingResult TMxpProcessor::processMxpInput(char& ch, bool resolveCustom
183208
TMxpTagHandlerResult const result = mMxpTagProcessor.handleTag(mMxpTagProcessor, *mpMxpClient, tag.get());
184209

185210
// If tag was not handled (not valid MXP and not a custom element), display it as-is
211+
// Use HANDLER_INSERT_ENTITY_SYS so the Unicode content is inserted directly
212+
// without being reprocessed through toLatin1() which would destroy non-ASCII chars
186213
if (result == MXP_TAG_NOT_HANDLED) {
187-
lastEntityValue = tag->toString();
188-
return HANDLER_INSERT_ENTITY_LIT;
214+
lastEntityValue = rawTagContent;
215+
return HANDLER_INSERT_ENTITY_SYS;
189216
}
190217

191218
return result == MXP_TAG_COMMIT_LINE ? HANDLER_COMMIT_LINE : HANDLER_NEXT_CHAR;

test/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ ENABLE_TESTING()
1313

1414
find_package(Qt6 6.8.2 REQUIRED
1515
COMPONENTS Core
16+
Core5Compat
1617
Multimedia
1718
MultimediaWidgets
1819
Network
@@ -26,6 +27,7 @@ link_libraries(
2627
Qt6::Test
2728
Qt6::Concurrent
2829
Qt6::Core
30+
Qt6::Core5Compat
2931
Qt6::Network
3032
Qt6::Multimedia
3133
Qt6::MultimediaWidgets

0 commit comments

Comments
 (0)