JDimproved · ma8ma · Apr 3, 2022 · Apr 3, 2022 · Apr 3, 2022
diff --git a/src/article/drawareabase.cpp b/src/article/drawareabase.cpp
@@ -4104,10 +4104,10 @@ bool DrawAreaBase::set_carets_dclick( CARET_POSITION& caret_left, CARET_POSITION
 
                 int byte_char_pointer;
                 const char32_t uch_pointer = MISC::utf8toutf32( layout->text + pos, byte_char_pointer );
-                const int ucstype_pointer = MISC::get_ucs2mode( uch_pointer );
+                const MISC::UnicodeBlock block_pointer = MISC::get_unicodeblock( uch_pointer );
 #ifdef _DEBUG
                 std::cout << "utf32 = " << std::hex << uch_pointer << std::dec
-                          << " type = " << ucstype_pointer << " pos = " << pos << std::endl;
+                          << " type = " << static_cast<int>( block_pointer ) << " pos = " << pos << std::endl;
 #endif
 
                 // 区切り文字をダブルクリックした
@@ -4124,16 +4124,16 @@ bool DrawAreaBase::set_carets_dclick( CARET_POSITION& caret_left, CARET_POSITION
 
                     int byte_char;
                     const char32_t uch = MISC::utf8toutf32( layout->text + pos_tmp, byte_char );
-                    const int ucstype = MISC::get_ucs2mode( uch );
+                    const MISC::UnicodeBlock block = MISC::get_unicodeblock( uch );
 
                     int byte_char_next;
                     const char32_t uch_next = MISC::utf8toutf32( layout->text + pos_tmp + byte_char, byte_char_next );
-                    const int ucstype_next = MISC::get_ucs2mode( uch_next );
+                    const MISC::UnicodeBlock block_next = MISC::get_unicodeblock( uch_next );
 
                     // 区切り文字が来たら左位置を移動する
                     if( uch_next == '\0' || is_separate_char( uch )
                         // 文字種が変わった
-                        || ( ucstype != ucstype_pointer && ucstype_next == ucstype_pointer )
+                        || ( block != block_pointer && block_next == block_pointer )
 
                         ) pos_left = pos_tmp + byte_char;
 
@@ -4146,11 +4146,11 @@ bool DrawAreaBase::set_carets_dclick( CARET_POSITION& caret_left, CARET_POSITION
 
                     int byte_char;
                     const char32_t uch = MISC::utf8toutf32( layout->text + pos_right, byte_char );
-                    const int ucstype = MISC::get_ucs2mode( uch );
+                    const MISC::UnicodeBlock block = MISC::get_unicodeblock( uch );
 
                     int byte_char_next;
                     const char32_t uch_next = MISC::utf8toutf32( layout->text + pos_right + byte_char, byte_char_next );
-                    const int ucstype_next = MISC::get_ucs2mode( uch_next );
+                    const MISC::UnicodeBlock block_next = MISC::get_unicodeblock( uch_next );
 
                     // 区切り文字が来たらbreak
                     if( is_separate_char( uch ) ) break;
@@ -4159,7 +4159,7 @@ bool DrawAreaBase::set_carets_dclick( CARET_POSITION& caret_left, CARET_POSITION
 
                     // 文字種が変わった
                     if( uch_next == '\0'
-                        || ( ucstype == ucstype_pointer && ucstype_next != ucstype_pointer )
+                        || ( block == block_pointer && block_next != block_pointer )
                         ) break;
                 }
 

diff --git a/src/jdlib/misccharcode.cpp b/src/jdlib/misccharcode.cpp
@@ -353,3 +353,18 @@ char32_t MISC::utf8toutf32( const char* utf8str, int& byte )
 
     return unich;
 }
+
+
+/** @brief 特定のUnicodeブロックかコードポイントを調べる
+ *
+ * @param[in] unich Unicodeコードポイント
+ * @return MISC::UnicodeBlock 列挙体
+ */
+MISC::UnicodeBlock MISC::get_unicodeblock( const char32_t unich )
+{
+    if( unich <= 0x007F ) return UnicodeBlock::BasicLatin;
+    if( unich >= 0x3040 && unich <= 0x309F ) return UnicodeBlock::Hira;
+    if( unich >= 0x30A0 && unich <= 0x30FF ) return UnicodeBlock::Kata;
+
+    return UnicodeBlock::Other;
+}
diff --git a/src/jdlib/misccharcode.h b/src/jdlib/misccharcode.h
@@ -19,6 +19,16 @@ namespace MISC
         CHARCODE_UTF
     };
 
+    /// @brief get_unicodeblock() の戻り値
+    enum class UnicodeBlock
+    {
+        BasicLatin, ///< 基本ラテン文字 [U+0000, U+007F]
+        Hira, ///< 平仮名 [U+3040, U+309F]
+        Kata, ///< 片仮名 [U+30A0, U+30FF]
+
+        Other, ///< 上記以外
+    };
+
     bool is_euc( const char* input, size_t read_byte );
     bool is_jis( const char* input, size_t& read_byte );
     bool is_sjis( const char* input, size_t read_byte );
@@ -33,6 +43,9 @@ namespace MISC
     // 出力 :  byte  長さ(バイト) utf8str が ASCII なら 1, UTF-8 なら 2 or 3 or 4, それ以外は 0 を入れて返す
     // 戻り値 : unicode code point
     char32_t utf8toutf32( const char* utf8str, int& byte );
+
+    /// 特定のUnicodeブロックかコードポイントを調べる
+    UnicodeBlock get_unicodeblock( const char32_t unich );
 }
 
 #endif
diff --git a/src/jdlib/miscutil.cpp b/src/jdlib/miscutil.cpp
@@ -1579,18 +1579,6 @@ int MISC::ucs2toutf8( const int ucs2,  char* utfstr )
 }
 
 
-//
-// ucs2 の種類
-//
-int MISC::get_ucs2mode( const int ucs2 )
-{
-    if( ucs2 >= 0x0000 && ucs2 <= 0x007f ) return UCS2MODE_BASIC_LATIN;
-    if( ucs2 >= 0x3040 && ucs2 <= 0x309f ) return UCS2MODE_HIRA;
-    if( ucs2 >= 0x30a0 && ucs2 <= 0x30ff ) return UCS2MODE_KATA;
-
-    return UCS2MODE_OTHER;
-}
-
 //
 // WAVEDASHなどのWindows系UTF-8文字をUnix系文字と相互変換
 //

diff --git a/src/jdlib/miscutil.h b/src/jdlib/miscutil.h
@@ -28,16 +28,6 @@ namespace MISC
                 SCHEME_SSSP
 	};
 
-     // get_ucs2mode()の戻り値
-     enum
-     {
-         UCS2MODE_BASIC_LATIN = 0,
-         UCS2MODE_HIRA,
-         UCS2MODE_KATA,
-
-         UCS2MODE_OTHER
-     };
-
 
      // utf8_fix_wavedash のモード
      enum
@@ -231,9 +221,6 @@ namespace MISC
     // str に含まれる「&#数字;」形式の数字参照文字列を全てユニーコード文字に変換する
     std::string decode_spchar_number( const std::string& str );
 
-    // ucs2 の種類
-    int get_ucs2mode( const int ucs2 );
-
     // ucs2 -> utf8 変換
     // 出力 : utfstr 変換後の文字
     // 戻り値 : バイト数

diff --git a/src/skeleton/editview.cpp b/src/skeleton/editview.cpp
@@ -14,6 +14,7 @@
 #include "environment.h"
 #include "session.h"
 
+#include "jdlib/misccharcode.h"
 #include "jdlib/miscutil.h"
 #include "config/globalconf.h"
 
@@ -733,10 +734,10 @@ static gboolean EditTextView_slot_extend_selection( GtkTextView*,
     Gtk::TextIter& end_iter = Glib::wrap( end );
 
     if( granularity == GTK_TEXT_EXTEND_SELECTION_WORD ) {
-        const auto mode = MISC::get_ucs2mode( loc_char );
+        const auto block = MISC::get_unicodeblock( loc_char );
         const bool sep = is_separate_char( loc_char );
-        const auto find_char = [mode, sep]( char32_t c ) {
-            return mode != MISC::get_ucs2mode( c ) || sep != is_separate_char( c );
+        const auto find_char = [block, sep]( char32_t c ) {
+            return block != MISC::get_unicodeblock( c ) || sep != is_separate_char( c );
         };
         if( start_iter.backward_find_char( find_char ) ) {
             start_iter.forward_char();

diff --git a/test/gtest_jdlib_misccharcode.cpp b/test/gtest_jdlib_misccharcode.cpp
@@ -164,4 +164,35 @@ TEST_F(Utf8ToUtf32Test, invalid_bytes)
     EXPECT_EQ( 0, byte );
 }
 
+class GetUnicodeBlockTest : public ::testing::Test {};
+
+TEST_F(GetUnicodeBlockTest, basic_latin)
+{
+    EXPECT_EQ( MISC::UnicodeBlock::BasicLatin, MISC::get_unicodeblock( 0x0000 ) );
+    EXPECT_EQ( MISC::UnicodeBlock::BasicLatin, MISC::get_unicodeblock( 0x007F ) );
+}
+
+TEST_F(GetUnicodeBlockTest, hiragana)
+{
+    EXPECT_EQ( MISC::UnicodeBlock::Hira, MISC::get_unicodeblock( 0x3040 ) );
+    EXPECT_EQ( MISC::UnicodeBlock::Hira, MISC::get_unicodeblock( 0x309F ) );
+}
+
+TEST_F(GetUnicodeBlockTest, katanaka)
+{
+    EXPECT_EQ( MISC::UnicodeBlock::Kata, MISC::get_unicodeblock( 0x30A0 ) );
+    EXPECT_EQ( MISC::UnicodeBlock::Kata, MISC::get_unicodeblock( 0x30FF ) );
+}
+
+TEST_F(GetUnicodeBlockTest, other)
+{
+    EXPECT_EQ( MISC::UnicodeBlock::Other, MISC::get_unicodeblock( 0x0080 ) );
+
+    EXPECT_EQ( MISC::UnicodeBlock::Other, MISC::get_unicodeblock( 0x303F ) );
+    EXPECT_EQ( MISC::UnicodeBlock::Other, MISC::get_unicodeblock( 0x3100 ) );
+
+    EXPECT_EQ( MISC::UnicodeBlock::Other, MISC::get_unicodeblock( 0x10FFFF ) );
+    EXPECT_EQ( MISC::UnicodeBlock::Other, MISC::get_unicodeblock( 0x110000 ) );
+}
+
 } // namespace