Skip to content

Commit

Permalink
Backport improvement #7038 - Improve performance of STARTING WITH wit…
Browse files Browse the repository at this point in the history
…h insensitive collations.
  • Loading branch information
asfernandes committed Jun 23, 2022
1 parent 9d47552 commit 05f0cb2
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 5 deletions.
5 changes: 3 additions & 2 deletions src/common/TextType.cpp
Expand Up @@ -364,12 +364,13 @@ ULONG TextType::canonical(ULONG srcLen, const UCHAR* src, ULONG dstLen, UCHAR* d

ULONG utf16_len = getCharSet()->getConvToUnicode().convertLength(srcLen);

ULONG errPos;

// convert to UTF-16
utf16_len = getCharSet()->getConvToUnicode().convert(srcLen, src,
utf16_len, utf16_str.getBuffer(utf16_len));
utf16_len, utf16_str.getBuffer(utf16_len), &errPos);

USHORT errCode;
ULONG errPos;

// convert UTF-16 to UTF-32
return UnicodeUtil::utf16ToUtf32(utf16_len, Firebird::Aligner<USHORT>(utf16_str.begin(), utf16_len),
Expand Down
2 changes: 2 additions & 0 deletions src/common/intlobj_new.h
Expand Up @@ -104,6 +104,8 @@ typedef ULONG (*pfn_INTL_str2case) (

/*
Places exactly texttype_canonical_width number of bytes into dst for each character from src.
src (srcLen) string may be a truncated string and in this case this function must consider
it as valid and process only the fully read characters.
Returns INTL_BAD_STR_LENGTH in case of error or number of characters processed if successful.
*/
typedef ULONG (*pfn_INTL_canonical) (
Expand Down
36 changes: 33 additions & 3 deletions src/jrd/Collation.cpp
Expand Up @@ -444,15 +444,21 @@ template <typename CharType, typename StrConverter>
class StartsMatcher : public PatternMatcher
{
public:
StartsMatcher(MemoryPool& pool, TextType* ttype, const CharType* str, SLONG str_len)
StartsMatcher(MemoryPool& pool, TextType* ttype, const CharType* str, SLONG str_len, SLONG aByteLengthLimit)
: PatternMatcher(pool, ttype),
evaluator(pool, str, str_len)
{
auto charSet = ttype->getCharSet();

byteLengthLimit = charSet->isMultiByte() ?
aByteLengthLimit / charSet->minBytesPerChar() * charSet->maxBytesPerChar() :
aByteLengthLimit;
}

void reset()
{
evaluator.reset();
processedByteLength = 0;
}

bool result()
Expand All @@ -462,36 +468,60 @@ class StartsMatcher : public PatternMatcher

bool process(const UCHAR* str, SLONG length)
{
if (processedByteLength + length > byteLengthLimit)
length = byteLengthLimit - processedByteLength;

processedByteLength += length;

StrConverter cvt(pool, textType, str, length);
fb_assert(length % sizeof(CharType) == 0);

return evaluator.processNextChunk(
reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
}

static StartsMatcher* create(MemoryPool& pool, TextType* ttype,
const UCHAR* str, SLONG length)
{
const auto byteLengthLimit = length;

StrConverter cvt(pool, ttype, str, length);
fb_assert(length % sizeof(CharType) == 0);

return FB_NEW_POOL(pool) StartsMatcher(pool, ttype,
reinterpret_cast<const CharType*>(str), length / sizeof(CharType));
reinterpret_cast<const CharType*>(str), length / sizeof(CharType), byteLengthLimit);
}

static bool evaluate(MemoryPool& pool, TextType* ttype, const UCHAR* s, SLONG sl,
const UCHAR* p, SLONG pl)
{
if (sl > pl)
{
auto charSet = ttype->getCharSet();

sl = charSet->isMultiByte() ?
MIN(sl, pl / charSet->minBytesPerChar() * charSet->maxBytesPerChar()) :
pl;
}

StrConverter cvt1(pool, ttype, p, pl);
StrConverter cvt2(pool, ttype, s, sl);
fb_assert(pl % sizeof(CharType) == 0);

StrConverter cvt2(pool, ttype, s, sl);
fb_assert(sl % sizeof(CharType) == 0);

Firebird::StartsEvaluator<CharType> evaluator(pool,
reinterpret_cast<const CharType*>(p), pl / sizeof(CharType));

evaluator.processNextChunk(reinterpret_cast<const CharType*>(s), sl / sizeof(CharType));

return evaluator.getResult();
}

private:
Firebird::StartsEvaluator<CharType> evaluator;
SLONG byteLengthLimit;
SLONG processedByteLength = 0;
};

template <typename CharType, typename StrConverter = CanonicalConverter<UpcaseConverter<> > >
Expand Down

0 comments on commit 05f0cb2

Please sign in to comment.