Skip to content

Commit

Permalink
Introducing a new SQLite extension function: GET_PHONEBOOK_INDEX
Browse files Browse the repository at this point in the history
This function will produce a normalized upper case first letter
from a given string.

Bug: 2407129
Change-Id: Idfafca04342d43ef43cfdff0e431e0a6a8cf5c68
  • Loading branch information
dmitriplotnikov committed Mar 3, 2010
1 parent aae12b8 commit 3a74962
Show file tree
Hide file tree
Showing 4 changed files with 264 additions and 12 deletions.
1 change: 1 addition & 0 deletions android/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ LOCAL_SRC_FILES:= \
PhoneNumberUtils.cpp \
PhoneticStringUtils.cpp \
OldPhoneNumberUtils.cpp \
PhonebookIndex.cpp \
sqlite3_android.cpp

LOCAL_C_INCLUDES := \
Expand Down
162 changes: 162 additions & 0 deletions android/PhonebookIndex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Copyright 2010, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <ctype.h>
#include <string.h>

#include <unicode/ucol.h>
#include <unicode/uiter.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>

#include "PhonebookIndex.h"
#include "PhoneticStringUtils.h"

#define SMALL_BUFFER_SIZE 10

namespace android {

// IMPORTANT! Keep the codes below SORTED. We are doing a binary search on the array
static UChar DEFAULT_CHAR_MAP[] = {
0x00C6, 'A', // AE
0x00DF, 'S', // Etzett
0x1100, 0x3131, // HANGUL LETTER KIYEOK
0x1101, 0x3132, // HANGUL LETTER SSANGKIYEOK
0x1102, 0x3134, // HANGUL LETTER NIEUN
0x1103, 0x3137, // HANGUL LETTER TIKEUT
0x1104, 0x3138, // HANGUL LETTER SSANGTIKEUT
0x1105, 0x3139, // HANGUL LETTER RIEUL
0x1106, 0x3141, // HANGUL LETTER MIEUM
0x1107, 0x3142, // HANGUL LETTER PIEUP
0x1108, 0x3143, // HANGUL LETTER SSANGPIEUP
0x1109, 0x3145, // HANGUL LETTER SIOS
0x110A, 0x3146, // HANGUL LETTER SSANGSIOS
0x110B, 0x3147, // HANGUL LETTER IEUNG
0x110C, 0x3148, // HANGUL LETTER CIEUC
0x110D, 0x3149, // HANGUL LETTER SSANGCIEUC
0x110E, 0x314A, // HANGUL LETTER CHIEUCH
0x110F, 0x314B, // HANGUL LETTER KHIEUKH
0x1110, 0x314C, // HANGUL LETTER THIEUTH
0x1111, 0x314D, // HANGUL LETTER PHIEUPH
0x1112, 0x314E, // HANGUL LETTER HIEUH
0x111A, 0x3140, // HANGUL LETTER RIEUL-HIEUH
0x1121, 0x3144, // HANGUL LETTER PIEUP-SIOS
0x1161, 0x314F, // HANGUL LETTER A
0x1162, 0x3150, // HANGUL LETTER AE
0x1163, 0x3151, // HANGUL LETTER YA
0x1164, 0x3152, // HANGUL LETTER YAE
0x1165, 0x3153, // HANGUL LETTER EO
0x1166, 0x3154, // HANGUL LETTER E
0x1167, 0x3155, // HANGUL LETTER YEO
0x1168, 0x3156, // HANGUL LETTER YE
0x1169, 0x3157, // HANGUL LETTER O
0x116A, 0x3158, // HANGUL LETTER WA
0x116B, 0x3159, // HANGUL LETTER WAE
0x116C, 0x315A, // HANGUL LETTER OE
0x116D, 0x315B, // HANGUL LETTER YO
0x116E, 0x315C, // HANGUL LETTER U
0x116F, 0x315D, // HANGUL LETTER WEO
0x1170, 0x315E, // HANGUL LETTER WE
0x1171, 0x315F, // HANGUL LETTER WI
0x1172, 0x3160, // HANGUL LETTER YU
0x1173, 0x3161, // HANGUL LETTER EU
0x1174, 0x3162, // HANGUL LETTER YI
0x1175, 0x3163, // HANGUL LETTER I
0x11AA, 0x3133, // HANGUL LETTER KIYEOK-SIOS
0x11AC, 0x3135, // HANGUL LETTER NIEUN-CIEUC
0x11AD, 0x3136, // HANGUL LETTER NIEUN-HIEUH
0x11B0, 0x313A, // HANGUL LETTER RIEUL-KIYEOK
0x11B1, 0x313B, // HANGUL LETTER RIEUL-MIEUM
0x11B3, 0x313D, // HANGUL LETTER RIEUL-SIOS
0x11B4, 0x313E, // HANGUL LETTER RIEUL-THIEUTH
0x11B5, 0x313F, // HANGUL LETTER RIEUL-PHIEUPH
};

/**
* Binary search to map an individual character to the corresponding phone book index.
*/
static UChar map_character(UChar c, UChar * char_map, int32_t length) {
int from = 0, to = length;
while (from < to) {
int m = ((to + from) >> 1) & ~0x1; // Only consider even positions
UChar cm = char_map[m];
if (cm == c) {
return char_map[m + 1];
} else if (cm < c) {
from = m + 2;
} else {
to = m;
}
}
return 0;
}

/**
* Returns TRUE if the character belongs to a Hanzi unicode block
*/
static bool is_CJK(UChar c) {
return
(0x4e00 <= c && c <= 0x9fff) // CJK_UNIFIED_IDEOGRAPHS
|| (0x3400 <= c && c <= 0x4dbf) // CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| (0x3000 <= c && c <= 0x303f) // CJK_SYMBOLS_AND_PUNCTUATION
|| (0x2e80 <= c && c <= 0x2eff) // CJK_RADICALS_SUPPLEMENT
|| (0x3300 <= c && c <= 0x33ff) // CJK_COMPATIBILITY
|| (0xfe30 <= c && c <= 0xfe4f) // CJK_COMPATIBILITY_FORMS
|| (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS
}

UChar GetPhonebookIndex(UCharIterator * iter, const char * locale) {
UChar dest[SMALL_BUFFER_SIZE];

// Normalize the first character to remove accents using the NFD normalization
UErrorCode errorCode = U_ZERO_ERROR;
int32_t len = unorm_next(iter, dest, SMALL_BUFFER_SIZE * sizeof(UChar), UNORM_NFD,
0 /* options */, TRUE /* normalize */, NULL, &errorCode);
if (U_FAILURE(errorCode) || len == 0) {
return 0;
}

UChar c = dest[0];

// We are only interested in letters
if (!u_isalpha(c)) {
return 0;
}

c = u_toupper(c);

// Check for explicitly mapped characters
UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
if (c_mapped != 0) {
return c_mapped;
}

// Convert Kanas to Hiragana
UChar next = len > 2 ? dest[1] : 0;
c = android::GetNormalizedCodePoint(c, next, NULL);

if (is_CJK(c)) {
if (strncmp(locale, "ja", 2) == 0) {
return 0x8A18; // Kanji character used as a heading in letters, notices and other documents
} else {
return 0;
}
}

return c;
}

} // namespace android
36 changes: 36 additions & 0 deletions android/PhonebookIndex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
**
** Copyright 2010, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/

#ifndef _ANDROID_PHONEBOOK_INDEX_H
#define _ANDROID_PHONEBOOK_INDEX_H

#include <unicode/uiter.h>
#include <unicode/utypes.h>

namespace android {

/**
* A character converter that takes a UNICODE character and produces the
* phonebook index for it in the specified locale. For example, "a" becomes "A"
* and so does A with accents. Conversion rules differ from locale
* locale, which is why this function takes locale as an argument.
*/
UChar GetPhonebookIndex(UCharIterator * iter, const char * locale);

} // namespace android

#endif
77 changes: 65 additions & 12 deletions android/sqlite3_android.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,18 @@
#include <unistd.h>

#include <unicode/ucol.h>
#include <unicode/uiter.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <cutils/log.h>

#include "sqlite3_android.h"
#include "PhoneNumberUtils.h"
#include "PhonebookIndex.h"
#include "PhoneticStringUtils.h"

#define ENABLE_ANDROID_LOG 0
#define SMALL_BUFFER_SIZE 10

static int collate16(void *p, int n1, const void *v1, int n2, const void *v2)
{
Expand Down Expand Up @@ -70,6 +74,45 @@ static int collate8(void *p, int n1, const void *v1, int n2, const void *v2)
}
}

/**
* Obtains the first UNICODE letter from the supplied string, normalizes and returns it.
*/
static void get_phonebook_index(
sqlite3_context * context, int argc, sqlite3_value ** argv)
{
if (argc != 2) {
sqlite3_result_null(context);
return;
}

char const * src = (char const *)sqlite3_value_text(argv[0]);
char const * locale = (char const *)sqlite3_value_text(argv[1]);
if (src == NULL || src[0] == 0 || locale == NULL) {
sqlite3_result_null(context);
return;
}

UCharIterator iter;
uiter_setUTF8(&iter, src, -1);

UChar index = android::GetPhonebookIndex(&iter, locale);
if (index == 0) {
sqlite3_result_null(context);
return;
}

uint32_t outlen = 0;
uint8_t out[SMALL_BUFFER_SIZE];
UBool isError = FALSE;
U8_APPEND(out, outlen, SMALL_BUFFER_SIZE * sizeof(uint8_t), index, isError);
if (isError || outlen == 0) {
sqlite3_result_null(context);
return;
}

sqlite3_result_text(context, (const char*)out, outlen, SQLITE_TRANSIENT);
}

static void get_phonetically_sortable_string(
sqlite3_context * context, int argc, sqlite3_value ** argv)
{
Expand Down Expand Up @@ -183,10 +226,10 @@ static void delete_file(sqlite3_context * context, int argc, sqlite3_value ** ar
sqlite3_result_null(context);
return;
}

if (strncmp("/sdcard/", path, 8) != 0) {
sqlite3_result_null(context);
return;
return;
}
if (strstr(path, "/../") != NULL) {
sqlite3_result_null(context);
Expand Down Expand Up @@ -335,7 +378,7 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
if (origData == NULL) {
sqlite3_result_null(context);
return;
}
}

// Get the raw bytes for the delimiter
const UChar * delim = (const UChar *)sqlite3_value_text16(argv[3]);
Expand All @@ -344,15 +387,15 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
sqlite3_result_null(context);
return;
}

UChar * token = NULL;
UChar *state;
int numTokens = 0;
int numTokens = 0;

do {
if (numTokens == 0) {
token = origData;
}
}

// Reset the program so we can use it to perform the insert
sqlite3_reset(statement);
Expand All @@ -367,9 +410,9 @@ static void tokenize(sqlite3_context * context, int argc, sqlite3_value ** argv)
uint32_t keysize = result-1;
uint32_t base16Size = keysize*2;
char *base16buf = (char*)malloc(base16Size);
base16Encode(base16buf, keybuf, keysize);
base16Encode(base16buf, keybuf, keysize);
err = sqlite3_bind_text(statement, 1, base16buf, base16Size, SQLITE_STATIC);

if (err != SQLITE_OK) {
LOGE(" sqlite3_bind_text16 error %d", err);
free(base16buf);
Expand Down Expand Up @@ -418,7 +461,7 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL
if (U_FAILURE(status)) {
return -1;
}

ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
if (U_FAILURE(status)) {
return -1;
Expand All @@ -438,7 +481,7 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL
if (err != SQLITE_OK) {
return err;
}

// Register the _TOKENIZE function
err = sqlite3_create_function(handle, "_TOKENIZE", 4, SQLITE_UTF16, collator, tokenize, NULL, NULL);
if (err != SQLITE_OK) {
Expand All @@ -465,7 +508,7 @@ extern "C" int register_android_functions(sqlite3 * handle, int utf16Storage)
UCollator * collator = ucol_open(NULL, &status);
if (U_FAILURE(status)) {
return -1;
}
}

if (utf16Storage) {
// Note that text should be stored as UTF-16
Expand Down Expand Up @@ -536,5 +579,15 @@ extern "C" int register_android_functions(sqlite3 * handle, int utf16Storage)
return err;
}

// Register the GET_PHONEBOOK_INDEX function
err = sqlite3_create_function(handle,
"GET_PHONEBOOK_INDEX",
2, SQLITE_UTF8, NULL,
get_phonebook_index,
NULL, NULL);
if (err != SQLITE_OK) {
return err;
}

return SQLITE_OK;
}

0 comments on commit 3a74962

Please sign in to comment.