Skip to content

Commit

Permalink
Fixed CORE-6542 - Implementation of SUBSTRING for UTF8 character set …
Browse files Browse the repository at this point in the history
…is inefficient.
  • Loading branch information
asfernandes committed Apr 16, 2021
1 parent ba23ae5 commit ac2532f
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
45 changes: 45 additions & 0 deletions src/common/IntlUtil.cpp
Expand Up @@ -32,6 +32,7 @@
#include "../intl/country_codes.h"
#include "../common/classes/auto.h"
#include "../common/classes/Aligner.h"
#include <unicode/utf8.h>


using Jrd::UnicodeUtil;
Expand Down Expand Up @@ -413,6 +414,49 @@ INTL_BOOL IntlUtil::utf8WellFormed(charset* cs, ULONG len, const UCHAR* str, ULO
}


ULONG IntlUtil::utf8SubString(charset* cs, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst,
ULONG startPos, ULONG length)
{
ULONG pos = 0;
ULONG currentPos = 0;
UChar32 c;

while (currentPos < startPos)
{
if (pos >= srcLen)
return 0;

U8_NEXT(src, pos, srcLen, c);

if (c < 0)
return INTL_BAD_STR_LENGTH;

++currentPos;
}

const UCHAR* copyStart = src + pos;

while (currentPos < startPos + length && pos < srcLen)
{
U8_NEXT(src, pos, srcLen, c);

if (c < 0)
return INTL_BAD_STR_LENGTH;

++currentPos;
}

unsigned size = src + pos - copyStart;

fb_assert(size <= dstLen);
if (size > dstLen)
return INTL_BAD_STR_LENGTH;

memcpy(dst, copyStart, size);
return size;
}


void IntlUtil::initAsciiCharset(charset* cs)
{
initNarrowCharset(cs, "ASCII");
Expand All @@ -428,6 +472,7 @@ void IntlUtil::initUtf8Charset(charset* cs)
initNarrowCharset(cs, "UTF8");
cs->charset_max_bytes_per_char = 4;
cs->charset_fn_well_formed = utf8WellFormed;
cs->charset_fn_substring = utf8SubString;

initConvert(&cs->charset_to_unicode, cvtUtf8ToUtf16);
initConvert(&cs->charset_from_unicode, cvtUtf16ToUtf8);
Expand Down
3 changes: 3 additions & 0 deletions src/common/IntlUtil.h
Expand Up @@ -73,6 +73,9 @@ class IntlUtil
static INTL_BOOL asciiWellFormed(charset* cs, ULONG len, const UCHAR* str, ULONG* offendingPos);
static INTL_BOOL utf8WellFormed(charset* cs, ULONG len, const UCHAR* str, ULONG* offendingPos);

static ULONG utf8SubString(charset* cs, ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* dst,
ULONG startPos, ULONG length);

static void initAsciiCharset(charset* cs);
static void initUtf8Charset(charset* cs);
static void initConvert(csconvert* cvt, pfn_INTL_convert func);
Expand Down

0 comments on commit ac2532f

Please sign in to comment.