From 4bad1d51f952fccc91bcee53c20fcc3d9a5825ba Mon Sep 17 00:00:00 2001 From: Masayuki Yamamoto Date: Tue, 5 Mar 2019 22:47:49 +0900 Subject: [PATCH] Update function parameters for MISC::asc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 変換したデータを書き込むバッファをポインター渡しから可変長配列の 参照渡しに変更する。渡された配列は初期化せずデータを追加していく。 配列のメモリ再確保や再配置を避けるには予め領域を予約して渡す必要がある。 --- src/jdlib/jdregex.cpp | 33 ++++++++-------- src/jdlib/jdregex.h | 6 ++- src/jdlib/miscutil.cpp | 85 ++++++++++++++---------------------------- src/jdlib/miscutil.h | 3 +- 4 files changed, 51 insertions(+), 76 deletions(-) diff --git a/src/jdlib/jdregex.cpp b/src/jdlib/jdregex.cpp index 7e5daf7e4..760ea48da 100644 --- a/src/jdlib/jdregex.cpp +++ b/src/jdlib/jdregex.cpp @@ -14,16 +14,14 @@ enum { - MAX_TARGET_SIZE = 64 * 2048, // 全角半角変換のバッファサイズ + MAX_TARGET_SIZE = 64 * 1024, // 全角半角変換のバッファサイズ REGEX_MAX_NMATCH = 32 }; using namespace JDLIB; Regex::Regex() - : m_compiled(false), - m_target_asc( NULL ), - m_table_pos( NULL ) + : m_compiled(false) { m_results.clear(); m_pos.clear(); @@ -33,9 +31,6 @@ Regex::Regex() Regex::~Regex() { dispose(); - - if( m_target_asc ) free( m_target_asc ); - if( m_table_pos ) free( m_table_pos ); } @@ -82,11 +77,15 @@ bool Regex::compile( const std::string reg, const bool icase, const bool newline // 全角英数字 → 半角英数字、半角カナ → 全角カナ if( m_wchar && MISC::has_widechar( asc_reg ) ){ - if( ! m_target_asc ) m_target_asc = ( char* )malloc( MAX_TARGET_SIZE ); - if( ! m_table_pos ) m_table_pos = ( int* )malloc( sizeof( int ) * MAX_TARGET_SIZE ); + m_target_asc.clear(); + m_table_pos.clear(); + if( m_target_asc.capacity() < MAX_TARGET_SIZE ) { + m_target_asc.reserve( MAX_TARGET_SIZE ); + m_table_pos.reserve( MAX_TARGET_SIZE ); + } - MISC::asc( asc_reg, m_target_asc, m_table_pos, MAX_TARGET_SIZE ); - asc_reg = m_target_asc; + MISC::asc( asc_reg, m_target_asc, m_table_pos ); + asc_reg = m_target_asc.c_str(); #ifdef _DEBUG std::cout << m_target_asc << std::endl; @@ -148,12 +147,16 @@ bool Regex::exec( const std::string& target, const size_t offset ) std::cout << target << std::endl; #endif - if( ! m_target_asc ) m_target_asc = ( char* )malloc( MAX_TARGET_SIZE ); - if( ! m_table_pos ) m_table_pos = ( int* )malloc( sizeof( int ) * MAX_TARGET_SIZE ); + m_target_asc.clear(); + m_table_pos.clear(); + if( m_target_asc.capacity() < MAX_TARGET_SIZE ) { + m_target_asc.reserve( MAX_TARGET_SIZE ); + m_table_pos.reserve( MAX_TARGET_SIZE ); + } - MISC::asc( asc_target, m_target_asc, m_table_pos, MAX_TARGET_SIZE ); + MISC::asc( asc_target, m_target_asc, m_table_pos ); exec_asc = true; - asc_target = m_target_asc; + asc_target = m_target_asc.c_str(); #ifdef _DEBUG std::cout << m_target_asc << std::endl; diff --git a/src/jdlib/jdregex.h b/src/jdlib/jdregex.h index 6a5f57749..a192bb56b 100644 --- a/src/jdlib/jdregex.h +++ b/src/jdlib/jdregex.h @@ -30,8 +30,10 @@ namespace JDLIB bool m_newline; bool m_wchar; - char *m_target_asc; - int *m_table_pos; + // 全角半角を区別しないときに使う変換用バッファ + // 処理可能なバッファ長は regoff_t (= int) のサイズに制限される + std::string m_target_asc; + std::vector< int > m_table_pos; public: diff --git a/src/jdlib/miscutil.cpp b/src/jdlib/miscutil.cpp index 242a9b73b..7582dad92 100644 --- a/src/jdlib/miscutil.cpp +++ b/src/jdlib/miscutil.cpp @@ -1734,59 +1734,44 @@ bool MISC::has_widechar( const char* str ) // 全角英数字(str1) -> 半角英数字(str2) // // table_pos : 置き換えた文字列の位置 -// n : str2 と table_pos のバッファサイズ // -void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n ) +void MISC::asc( const char* str1, std::string& str2, std::vector< int >& table_pos ) { - const size_t mrg = 18; - size_t pos = 0; - size_t pos2 = 0; + for( int pos = 0; str1[ pos ] != '\0'; ) { + assert( pos >= 0 ); + assert( table_pos.max_size() > table_pos.size() ); + const auto in1 = static_cast< unsigned char >( str1[ pos ] ); - while( pos2 < ( n - mrg ) && *( str1 + pos ) != '\0' ){ - - const unsigned char in = *( str1 + pos ); - - if( in == 0xef ){ - - const unsigned char in2 = * ( str1 + pos + 1 ); - const unsigned char in3 = * ( str1 + pos + 2 ); + if( in1 == 0xef ) { + const auto in2 = static_cast< unsigned char >( str1[ pos + 1 ] ); + const auto in3 = static_cast< unsigned char >( str1[ pos + 2 ] ); if( in2 == 0xbc ){ - - // 全角数字 + // 全角数字 (U+FF10 - U+FF19) if( 0x90 <= in3 && in3 <= 0x99 ){ - - str2[ pos2 ] = '0' + in3 - 0x90;; - table_pos[ pos2 ] = pos; + str2.push_back( '0' + in3 - 0x90 ); + table_pos.push_back( pos ); pos += 3; - ++pos2; continue; } - - // 全角大文字 + // 全角大文字 (U+FF21 - U+FF3A) else if( 0xa1 <= in3 && in3 <= 0xba ){ - - str2[ pos2 ] = 'A' + in3 - 0xa1; - table_pos[ pos2 ] = pos; - + str2.push_back( 'A' + in3 - 0xa1 ); + table_pos.push_back( pos ); pos += 3; - ++pos2; continue; } } - // 全角小文字 + // 全角小文字 (U+FF41 - U+FF5A) else if( in2 == 0xbd && ( 0x81 <= in3 && in3 <= 0x9a ) ){ - - str2[ pos2 ] = 'a' + in3 - 0x81; - table_pos[ pos2 ] = pos; - + str2.push_back( 'a' + in3 - 0x81 ); + table_pos.push_back( pos ); pos += 3; - ++pos2; continue; } - // 半角かな + // 半角かな (U+FF61 - U+FF9F) else if( ( in2 == 0xbd && ( 0xa1 <= in3 && in3 <= 0xbf ) ) || ( in2 == 0xbe && ( 0x80 <= in3 && in3 <= 0x9f ) ) ){ @@ -1795,18 +1780,16 @@ void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n ) size_t i = 0; // 濁点、半濁点 - const unsigned char in4 = * ( str1 + pos + 3 ); - const unsigned char in5 = * ( str1 + pos + 4 ); + const auto in4 = static_cast< unsigned char >( str1[ pos + 3 ] ); + const auto in5 = static_cast< unsigned char >( str1[ pos + 4 ] ); if( in4 == 0xef && in5 == 0xbe ){ - - const unsigned char in6 = * ( str1 + pos + 5 ); + const auto in6 = static_cast< unsigned char >( str1[ pos + 5 ] ); // 濁点 if( in6 == 0x9e ){ dakuten = true; i = 61; } - // 半濁点 else if( in6 == 0x9f ){ dakuten = true; @@ -1816,16 +1799,14 @@ void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n ) while( !flag_hkana && hkana_table1[ i ][ 0 ][ 0 ] != '\0' ){ - if( in == hkana_table1[ i ][ 0 ][ 0 ] && in2 == hkana_table1[ i ][ 0 ][ 1 ] && in3 == hkana_table1[ i ][ 0 ][ 2 ] ){ + if( in1 == hkana_table1[ i ][ 0 ][ 0 ] + && in2 == hkana_table1[ i ][ 0 ][ 1 ] + && in3 == hkana_table1[ i ][ 0 ][ 2 ] ) { - str2[ pos2 ] = hkana_table1[ i ][ 1 ][ 0 ]; - str2[ pos2 +1 ] = hkana_table1[ i ][ 1 ][ 1 ]; - str2[ pos2 +2 ] = hkana_table1[ i ][ 1 ][ 2 ]; - table_pos[ pos2 ] = pos; + std::copy_n( hkana_table1[ i ][ 1 ], 3, std::back_inserter( str2 ) ); + std::generate_n( std::back_inserter( table_pos ), 3, [&pos]{ return pos++; } ); - pos += 3; if( dakuten ) pos += 3; - pos2 += 3; flag_hkana = true; } ++i; @@ -1834,18 +1815,8 @@ void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n ) } } - str2[ pos2 ] = str1[ pos ]; - table_pos[ pos2 ] = pos; - + str2.push_back( str1[ pos ] ); + table_pos.push_back( pos ); ++pos; - ++pos2; - } - - if( pos2 >= ( n - mrg ) ){ - ERRMSG( "MISC::asc : buffer overflow." ); - pos2 = ( n - mrg ) - 1; } - - table_pos[ pos2 ] = pos; - str2[ pos2 ] = '\0'; } diff --git a/src/jdlib/miscutil.h b/src/jdlib/miscutil.h index 11eb1f51a..f3489043b 100644 --- a/src/jdlib/miscutil.h +++ b/src/jdlib/miscutil.h @@ -265,8 +265,7 @@ namespace MISC // 全角英数字(str1) -> 半角英数字(str2) // table_pos : 置き換えた文字列の位置 - // n : str2 と table_pos のバッファサイズ - void asc( const char* str1, char* str2, int* table_pos, const size_t n ); + void asc( const char* str1, std::string& str2, std::vector< int >& table_pos ); // URL中のスキームを判別する