Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update function parameters for MISC::asc() #57

Merged
merged 1 commit into from Mar 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 18 additions & 15 deletions src/jdlib/jdregex.cpp
Expand Up @@ -14,16 +14,14 @@

enum
{
MAX_TARGET_SIZE = 64 * 2048, // 全角半角変換のバッファサイズ
MAX_TARGET_SIZE = 64 * 1024, // 全角半角変換のバッファサイズ
REGEX_MAX_NMATCH = 32
};

using namespace JDLIB;

Regex::Regex()
: m_compiled(false),
m_target_asc( NULL ),
m_table_pos( NULL )
: m_compiled(false)
{
m_results.clear();
m_pos.clear();
Expand All @@ -33,9 +31,6 @@ Regex::Regex()
Regex::~Regex()
{
dispose();

if( m_target_asc ) free( m_target_asc );
if( m_table_pos ) free( m_table_pos );
}


Expand Down Expand Up @@ -82,11 +77,15 @@ bool Regex::compile( const std::string reg, const bool icase, const bool newline
// 全角英数字 → 半角英数字、半角カナ → 全角カナ
if( m_wchar && MISC::has_widechar( asc_reg ) ){

if( ! m_target_asc ) m_target_asc = ( char* )malloc( MAX_TARGET_SIZE );
if( ! m_table_pos ) m_table_pos = ( int* )malloc( sizeof( int ) * MAX_TARGET_SIZE );
m_target_asc.clear();
m_table_pos.clear();
if( m_target_asc.capacity() < MAX_TARGET_SIZE ) {
m_target_asc.reserve( MAX_TARGET_SIZE );
m_table_pos.reserve( MAX_TARGET_SIZE );
}

MISC::asc( asc_reg, m_target_asc, m_table_pos, MAX_TARGET_SIZE );
asc_reg = m_target_asc;
MISC::asc( asc_reg, m_target_asc, m_table_pos );
asc_reg = m_target_asc.c_str();

#ifdef _DEBUG
std::cout << m_target_asc << std::endl;
Expand Down Expand Up @@ -148,12 +147,16 @@ bool Regex::exec( const std::string& target, const size_t offset )
std::cout << target << std::endl;
#endif

if( ! m_target_asc ) m_target_asc = ( char* )malloc( MAX_TARGET_SIZE );
if( ! m_table_pos ) m_table_pos = ( int* )malloc( sizeof( int ) * MAX_TARGET_SIZE );
m_target_asc.clear();
m_table_pos.clear();
if( m_target_asc.capacity() < MAX_TARGET_SIZE ) {
m_target_asc.reserve( MAX_TARGET_SIZE );
m_table_pos.reserve( MAX_TARGET_SIZE );
}

MISC::asc( asc_target, m_target_asc, m_table_pos, MAX_TARGET_SIZE );
MISC::asc( asc_target, m_target_asc, m_table_pos );
exec_asc = true;
asc_target = m_target_asc;
asc_target = m_target_asc.c_str();

#ifdef _DEBUG
std::cout << m_target_asc << std::endl;
Expand Down
6 changes: 4 additions & 2 deletions src/jdlib/jdregex.h
Expand Up @@ -30,8 +30,10 @@ namespace JDLIB
bool m_newline;
bool m_wchar;

char *m_target_asc;
int *m_table_pos;
// 全角半角を区別しないときに使う変換用バッファ
// 処理可能なバッファ長は regoff_t (= int) のサイズに制限される
std::string m_target_asc;
std::vector< int > m_table_pos;

public:

Expand Down
85 changes: 28 additions & 57 deletions src/jdlib/miscutil.cpp
Expand Up @@ -1734,59 +1734,44 @@ bool MISC::has_widechar( const char* str )
// 全角英数字(str1) -> 半角英数字(str2)
//
// table_pos : 置き換えた文字列の位置
// n : str2 と table_pos のバッファサイズ
//
void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n )
void MISC::asc( const char* str1, std::string& str2, std::vector< int >& table_pos )
{
const size_t mrg = 18;
size_t pos = 0;
size_t pos2 = 0;
for( int pos = 0; str1[ pos ] != '\0'; ) {
assert( pos >= 0 );
assert( table_pos.max_size() > table_pos.size() );
const auto in1 = static_cast< unsigned char >( str1[ pos ] );

while( pos2 < ( n - mrg ) && *( str1 + pos ) != '\0' ){

const unsigned char in = *( str1 + pos );

if( in == 0xef ){

const unsigned char in2 = * ( str1 + pos + 1 );
const unsigned char in3 = * ( str1 + pos + 2 );
if( in1 == 0xef ) {
const auto in2 = static_cast< unsigned char >( str1[ pos + 1 ] );
const auto in3 = static_cast< unsigned char >( str1[ pos + 2 ] );

if( in2 == 0xbc ){

// 全角数字
// 全角数字 (U+FF10 - U+FF19)
if( 0x90 <= in3 && in3 <= 0x99 ){

str2[ pos2 ] = '0' + in3 - 0x90;;
table_pos[ pos2 ] = pos;
str2.push_back( '0' + in3 - 0x90 );
table_pos.push_back( pos );
pos += 3;
++pos2;
continue;
}

// 全角大文字
// 全角大文字 (U+FF21 - U+FF3A)
else if( 0xa1 <= in3 && in3 <= 0xba ){

str2[ pos2 ] = 'A' + in3 - 0xa1;
table_pos[ pos2 ] = pos;

str2.push_back( 'A' + in3 - 0xa1 );
table_pos.push_back( pos );
pos += 3;
++pos2;
continue;
}
}

// 全角小文字
// 全角小文字 (U+FF41 - U+FF5A)
else if( in2 == 0xbd && ( 0x81 <= in3 && in3 <= 0x9a ) ){

str2[ pos2 ] = 'a' + in3 - 0x81;
table_pos[ pos2 ] = pos;

str2.push_back( 'a' + in3 - 0x81 );
table_pos.push_back( pos );
pos += 3;
++pos2;
continue;
}

// 半角かな
// 半角かな (U+FF61 - U+FF9F)
else if( ( in2 == 0xbd && ( 0xa1 <= in3 && in3 <= 0xbf ) )
|| ( in2 == 0xbe && ( 0x80 <= in3 && in3 <= 0x9f ) ) ){

Expand All @@ -1795,18 +1780,16 @@ void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n )
size_t i = 0;

// 濁点、半濁点
const unsigned char in4 = * ( str1 + pos + 3 );
const unsigned char in5 = * ( str1 + pos + 4 );
const auto in4 = static_cast< unsigned char >( str1[ pos + 3 ] );
const auto in5 = static_cast< unsigned char >( str1[ pos + 4 ] );
if( in4 == 0xef && in5 == 0xbe ){

const unsigned char in6 = * ( str1 + pos + 5 );
const auto in6 = static_cast< unsigned char >( str1[ pos + 5 ] );

// 濁点
if( in6 == 0x9e ){
dakuten = true;
i = 61;
}

// 半濁点
else if( in6 == 0x9f ){
dakuten = true;
Expand All @@ -1816,16 +1799,14 @@ void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n )

while( !flag_hkana && hkana_table1[ i ][ 0 ][ 0 ] != '\0' ){

if( in == hkana_table1[ i ][ 0 ][ 0 ] && in2 == hkana_table1[ i ][ 0 ][ 1 ] && in3 == hkana_table1[ i ][ 0 ][ 2 ] ){
if( in1 == hkana_table1[ i ][ 0 ][ 0 ]
&& in2 == hkana_table1[ i ][ 0 ][ 1 ]
&& in3 == hkana_table1[ i ][ 0 ][ 2 ] ) {

str2[ pos2 ] = hkana_table1[ i ][ 1 ][ 0 ];
str2[ pos2 +1 ] = hkana_table1[ i ][ 1 ][ 1 ];
str2[ pos2 +2 ] = hkana_table1[ i ][ 1 ][ 2 ];
table_pos[ pos2 ] = pos;
std::copy_n( hkana_table1[ i ][ 1 ], 3, std::back_inserter( str2 ) );
std::generate_n( std::back_inserter( table_pos ), 3, [&pos]{ return pos++; } );

pos += 3;
if( dakuten ) pos += 3;
pos2 += 3;
flag_hkana = true;
}
++i;
Expand All @@ -1834,18 +1815,8 @@ void MISC::asc( const char* str1, char* str2, int* table_pos, const size_t n )
}
}

str2[ pos2 ] = str1[ pos ];
table_pos[ pos2 ] = pos;

str2.push_back( str1[ pos ] );
table_pos.push_back( pos );
++pos;
++pos2;
}

if( pos2 >= ( n - mrg ) ){
ERRMSG( "MISC::asc : buffer overflow." );
pos2 = ( n - mrg ) - 1;
}

table_pos[ pos2 ] = pos;
str2[ pos2 ] = '\0';
}
3 changes: 1 addition & 2 deletions src/jdlib/miscutil.h
Expand Up @@ -265,8 +265,7 @@ namespace MISC

// 全角英数字(str1) -> 半角英数字(str2)
// table_pos : 置き換えた文字列の位置
// n : str2 と table_pos のバッファサイズ
void asc( const char* str1, char* str2, int* table_pos, const size_t n );
void asc( const char* str1, std::string& str2, std::vector< int >& table_pos );


// URL中のスキームを判別する
Expand Down