Skip to content

Commit

Permalink
Text encodings WIP.
Browse files Browse the repository at this point in the history
  • Loading branch information
SpartanJ committed May 10, 2024
1 parent 6b8da50 commit f8d466d
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 20 deletions.
1 change: 1 addition & 0 deletions include/eepp/ui/doc/textformat.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class EE_API TextFormat {
UTF16LE = String::hash( "UTF-16 LE" ),
UTF16BE = String::hash( "UTF-16 BE" ),
Latin1 = String::hash( "ISO-8859-1" ),
Shift_JIS = String::hash( "Shift_JIS" ),
};

enum class LineEnding { LF, CRLF, CR };
Expand Down
5 changes: 5 additions & 0 deletions include/eepp/window/platformhelper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ class EE_API PlatformHelper {
*/
virtual bool openURL( const std::string& url ) = 0;

virtual char* iconv( const char* tocode, const char* fromcode, const char* inbuf,
size_t inbytesleft ) = 0;

virtual void iconvFree( char* buf ) = 0;

#if EE_PLATFORM == EE_PLATFORM_ANDROID
/** @return The Activity object for the application */
virtual void* getActivity() = 0;
Expand Down
29 changes: 26 additions & 3 deletions src/eepp/ui/doc/textdocument.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cstdio>
#include <eepp/core/debug.hpp>
#include <eepp/core/debug.hpp>
#include <eepp/network/uri.hpp>
#include <eepp/system/filesystem.hpp>
#include <eepp/system/iostreamfile.hpp>
Expand All @@ -12,6 +11,7 @@
#include <eepp/ui/doc/syntaxdefinitionmanager.hpp>
#include <eepp/ui/doc/syntaxhighlighter.hpp>
#include <eepp/ui/doc/textdocument.hpp>
#include <eepp/window/engine.hpp>
#include <string>

using namespace std::literals;
Expand Down Expand Up @@ -104,6 +104,17 @@ void TextDocument::resetCursor() {
notifySelectionChanged();
}

String shiftJISToUTF32( const std::string_view& shiftJISString ) {
String string;
auto* ret = Window::Engine::instance()->getPlatformHelper()->iconv(
"UTF-32LE", "SHIFT-JIS", shiftJISString.data(), shiftJISString.size() );
if ( ret ) {
string = String( reinterpret_cast<String::StringBaseType*>( ret ) );
Window::Engine::instance()->getPlatformHelper()->iconvFree( ret );
}
return string;
}

static constexpr int codepointSize( TextFormat::Encoding enc ) {
switch ( enc ) {
case TextFormat::Encoding::UTF16LE:
Expand Down Expand Up @@ -167,7 +178,9 @@ static String ptrGetLine( char* data, const size_t& size, size_t& position,
position++;
}

if ( enc == TextFormat::Encoding::Latin1 )
if ( enc == TextFormat::Encoding::Shift_JIS )
return shiftJISToUTF32( std::string_view{ data, position } );
else if ( enc == TextFormat::Encoding::Latin1 )
return String::fromLatin1( data, position );

return String( data, position );
Expand Down Expand Up @@ -687,6 +700,7 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) {
MD5::update( md5Ctx, bom, sizeof( bom ) );
break;
}
case TextFormat::Encoding::Shift_JIS:
case TextFormat::Encoding::Latin1:
break;
}
Expand Down Expand Up @@ -764,6 +778,15 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) {
MD5::update( md5Ctx, latin1.data(), latin1.size() );
break;
}
case TextFormat::Encoding::Shift_JIS: {
auto* ret = Window::Engine::instance()->getPlatformHelper()->iconv(
"SHIFT-JIS", "UTF-8", text.c_str(), text.size() );
auto len = strlen( ret );
stream.write( ret, len );
MD5::update( md5Ctx, ret, len );
Window::Engine::instance()->getPlatformHelper()->iconvFree( ret );
break;
}
case TextFormat::Encoding::UTF8: {
stream.write( text.c_str(), text.size() );
MD5::update( md5Ctx, text.data(), text.size() );
Expand Down
82 changes: 78 additions & 4 deletions src/eepp/ui/doc/textformat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,47 @@ template <bool BigEndian> struct UTF16 {
using UTF16_LE = UTF16<false>;
using UTF16_BE = UTF16<true>;

//-------------------------------------------------------------------
// Shift JIS
//-------------------------------------------------------------------
struct ShiftJIS {
static inline Uint16 getUnit( const char* src ) {
return Uint8( src[0] ) | ( Uint16( Uint8( src[1] ) ) << 8 );
}

static inline TextDecodeResult decodePoint( std::string_view view ) {
// Shift JIS ranges for single-byte and double-byte characters
static constexpr std::pair<unsigned char, unsigned char> firstByteRange1( 0x81, 0x9F );
static constexpr std::pair<unsigned char, unsigned char> firstByteRange2( 0xE0, 0xEF );
static constexpr std::pair<unsigned char, unsigned char> secondByteRange1( 0x40, 0x7E );
static constexpr std::pair<unsigned char, unsigned char> secondByteRange2( 0x80, 0xFC );

if ( view.size() == 0 )
return {};

Uint8 first = view[0];
if ( first < 0x7F )
return { first, TextDecodeResult::Status::Valid, 1 };

if ( view.size() < 2 &&
( ( first >= secondByteRange1.first && first <= secondByteRange1.second ) ||
( first >= secondByteRange2.first && first <= secondByteRange2.second ) ) ) {
return { first, TextDecodeResult::Status::Valid, 1 };
}

Uint8 second = view[1];

if ( ( ( first >= firstByteRange1.first && first <= firstByteRange1.second ) ||
( first >= firstByteRange2.first && first <= firstByteRange2.second ) ) &&
( ( second >= secondByteRange1.first && second <= secondByteRange1.second ) ||
( second >= secondByteRange2.first && second <= secondByteRange2.second ) ) ) {
return { getUnit( view.data() ), TextDecodeResult::Status::Valid, 2 };
}

return { first, TextDecodeResult::Status::Invalid, 1 };
}
};

//-------------------------------------------------------------------
// UTF8
//-------------------------------------------------------------------
Expand Down Expand Up @@ -193,6 +234,9 @@ template <> struct TextEncoding::Wrapper<UTF16_LE> {
template <> struct TextEncoding::Wrapper<UTF16_BE> {
static TextEncoding Instance;
};
template <> struct TextEncoding::Wrapper<ShiftJIS> {
static TextEncoding Instance;
};

//-------------------------------------------------------------------
// TextEncoding (indirect through function vectors)
Expand All @@ -217,6 +261,11 @@ TextEncoding TextEncoding::Wrapper<UTF16_BE>::Instance = {
2,
};

TextEncoding TextEncoding::Wrapper<ShiftJIS>::Instance = {
&ShiftJIS::decodePoint,
1,
};

const TextEncoding* encodingFromEnum( TextFormat::Encoding enc ) {
switch ( enc ) {
default:
Expand All @@ -243,7 +292,10 @@ struct TextFileStats {
Uint32 numPlainAscii = 0; // includes whitespace, excludes control characters < 32
Uint32 numWhitespace = 0;
Uint32 numExtended = 0;
Uint32 num16bytes = 0;
float ooNumPoints = 0.f;
float score = 0.f;
bool count16b{ false };

Uint32 numInvalidPoints() const { return numPoints - numValidPoints; }

Expand All @@ -256,11 +308,15 @@ struct TextFileStats {
}
}

float getScore() const {
return ( 2.5f * numWhitespace + numPlainAscii - 100.f * numInvalidPoints() -
50.f * numControl + 5.f * numExtended ) *
ooNumPoints;
void calcScore() {
if ( !score ) {
score = ( 2.5f * numWhitespace + numPlainAscii - 100.f * numInvalidPoints() -
50.f * numControl + 5.f * numExtended + 2.5f * num16bytes ) *
ooNumPoints;
}
}

float getScore() const { return score; }
};

static Uint32 scanTextFile( TextFileStats& stats, IOStream& ins, const TextEncoding* encoding,
Expand Down Expand Up @@ -314,13 +370,16 @@ static Uint32 scanTextFile( TextFileStats& stats, IOStream& ins, const TextEncod
}
} else if ( decoded.point >= 65536 ) {
stats.numExtended++;
} else if ( stats.count16b && decoded.point >= 0x8140 ) {
stats.num16bytes++;
}
}
prevWasCR = ( decoded.point == '\r' );
}
if ( stats.numPoints > 0 ) {
stats.ooNumPoints = 1.f / stats.numPoints;
}
stats.calcScore();
return numBytes;
}

Expand Down Expand Up @@ -371,6 +430,16 @@ TextFormat guessFileEncoding( IOStream& ins ) {
encoding = TextFormat::Encoding::UTF16BE;
}

TextFileStats statsShiftJIS;
statsShiftJIS.count16b = true;
scanTextFile( statsShiftJIS, ins, TextEncoding::get<ShiftJIS>(), NumBytesForAutodetect );
ins.seek( 0 );

if ( statsShiftJIS.getScore() > stats->getScore() ) {
stats = &statsShiftJIS;
encoding = TextFormat::Encoding::Shift_JIS;
}

// Choose between the UTF16 and 8-bit encoding:
if ( stats8.getScore() >= stats->getScore() ) {
stats = &stats8;
Expand Down Expand Up @@ -448,6 +517,8 @@ TextFormat::Encoding TextFormat::encodingFromString( const std::string_view& str
return TextFormat::Encoding::UTF16BE;
case static_cast<String::HashType>( TextFormat::Encoding::Latin1 ):
return TextFormat::Encoding::Latin1;
case static_cast<String::HashType>( TextFormat::Encoding::Shift_JIS ):
return TextFormat::Encoding::Shift_JIS;
case static_cast<String::HashType>( TextFormat::Encoding::UTF8 ):
default:
return TextFormat::Encoding::UTF8;
Expand All @@ -462,6 +533,8 @@ std::string TextFormat::encodingToString( TextFormat::Encoding enc ) {
return "UTF-16 BE";
case TextFormat::Encoding::Latin1:
return "ISO-8859-1";
case TextFormat::Encoding::Shift_JIS:
return "Shift_JIS";
case TextFormat::Encoding::UTF8:
default:
break;
Expand All @@ -475,6 +548,7 @@ std::vector<std::pair<TextFormat::Encoding, std::string>> TextFormat::encodings(
encs.emplace_back( Encoding::UTF16BE, encodingToString( Encoding::UTF16BE ) );
encs.emplace_back( Encoding::UTF16LE, encodingToString( Encoding::UTF16LE ) );
encs.emplace_back( Encoding::Latin1, encodingToString( Encoding::Latin1 ) );
encs.emplace_back( Encoding::Shift_JIS, encodingToString( Encoding::Shift_JIS ) );
return encs;
}

Expand Down
34 changes: 21 additions & 13 deletions src/eepp/window/backend/SDL2/platformhelpersdl2.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#include <eepp/system/log.hpp>
#include <eepp/window/backend/SDL2/base.hpp>
#include <eepp/window/backend/SDL2/platformhelpersdl2.hpp>
#include <eepp/system/log.hpp>

using namespace EE::System;

#if EE_PLATFORM == EE_PLATFORM_EMSCRIPTEN
#include <emscripten.h>
EM_JS(void, emscripten_open_url, (const char *msg), {
window.open(UTF8ToString(msg), 'blank');
});
EM_JS( void, emscripten_open_url, ( const char* msg ),
{ window.open( UTF8ToString( msg ), 'blank' ); } );
#endif

#if EE_PLATFORM == EE_PLATFORM_ANDROID
Expand All @@ -21,20 +20,29 @@ PlatformHelperSDL2::PlatformHelperSDL2() {}

bool PlatformHelperSDL2::openURL( const std::string& url ) {
#if EE_PLATFORM == EE_PLATFORM_EMSCRIPTEN
emscripten_open_url(url.c_str());
emscripten_open_url( url.c_str() );
return true;
#else
#if SDL_VERSION_ATLEAST(2,0,14)
int res = SDL_OpenURL( url.c_str() );
if ( res != 0 )
Log::error( "PlatformHelperSDL2::openURL: Failed with error - %s", SDL_GetError() );
return res == 0;
#else
return false;
#endif
#if SDL_VERSION_ATLEAST( 2, 0, 14 )
int res = SDL_OpenURL( url.c_str() );
if ( res != 0 )
Log::error( "PlatformHelperSDL2::openURL: Failed with error - %s", SDL_GetError() );
return res == 0;
#else
return false;
#endif
#endif
}

char* PlatformHelperSDL2::iconv( const char* tocode, const char* fromcode, const char* inbuf,
size_t inbytesleft ) {
return SDL_iconv_string( tocode, fromcode, inbuf, inbytesleft );
}

void PlatformHelperSDL2::iconvFree( char* buf ) {
SDL_free( buf );
}

#if EE_PLATFORM == EE_PLATFORM_ANDROID
void* PlatformHelperSDL2::getActivity() {
return SDL_AndroidGetActivity();
Expand Down
4 changes: 4 additions & 0 deletions src/eepp/window/backend/SDL2/platformhelpersdl2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ class EE_API PlatformHelperSDL2 : public PlatformHelper {

bool openURL( const std::string& url );

char* iconv( const char* tocode, const char* fromcode, const char* inbuf, size_t inbytesleft );

void iconvFree( char* buf );

#if EE_PLATFORM == EE_PLATFORM_ANDROID
void* getActivity();

Expand Down

0 comments on commit f8d466d

Please sign in to comment.