From 0dcc889e8d9b029026d3aa6a6d6b1f1d1b890c6a Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 15 May 2019 22:43:47 +0200 Subject: [PATCH] Fix apiexample_test with locale de_DE.UTF-8 The unittest failed with LANG=de_DE.UTF-8: $ unittest/apiexample_test Running main() from ../../../../unittest/../googletest/googletest/src/gtest_main.cc [==========] Running 4 tests from 2 test suites. [----------] Global test environment set-up. [----------] 1 test from EuroText [ RUN ] EuroText.FastLatinOCR contains_unichar_id(unichar_id):Error:Assert failed:in file ../../../../../src/ccutil/unicharset.h, line 874 Signed-off-by: Stefan Weil --- src/ccutil/unicharset.cpp | 95 +++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/src/ccutil/unicharset.cpp b/src/ccutil/unicharset.cpp index 6c69ecd20f..8585e64f4a 100644 --- a/src/ccutil/unicharset.cpp +++ b/src/ccutil/unicharset.cpp @@ -22,6 +22,9 @@ #include #include #include +#include // for std::setw +#include // for std::locale::classic +#include // for std::istringstream #include "params.h" #include "serialis.h" @@ -706,6 +709,7 @@ bool UNICHARSET::save_to_string(STRING *str) const { this->get_script_from_script_id(this->get_script(id)), this->get_other_case(id)); } else { + // FIXME snprintf(buffer, kFileBufSize, "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n", this->id_to_unichar(id), properties, @@ -815,41 +819,64 @@ bool UNICHARSET::load_via_fgets( float advance = 0.0f; float advance_sd = 0.0f; // TODO(eger): check that this default it ok - // after enabling BiDi iterator for Arabic+Cube. + // after enabling BiDi iterator for Arabic. int direction = UNICHARSET::U_LEFT_TO_RIGHT; - UNICHAR_ID other_case = id; - UNICHAR_ID mirror = id; + UNICHAR_ID other_case = unicharset_size; + UNICHAR_ID mirror = unicharset_size; + if (fgets_cb->Run(buffer, sizeof (buffer)) == nullptr) { + return false; + } char normed[64]; - int v = -1; - if (fgets_cb->Run(buffer, sizeof (buffer)) == nullptr || - ((v = sscanf(buffer, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - &width, &width_sd, &bearing, &bearing_sd, - &advance, &advance_sd, script, &other_case, - &direction, &mirror, normed)) != 17 && - (v = sscanf(buffer, - "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - &width, &width_sd, &bearing, &bearing_sd, - &advance, &advance_sd, script, &other_case, - &direction, &mirror)) != 16 && - (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d", - unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - script, &other_case, &direction, &mirror)) != 10 && - (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d", unichar, &properties, - &min_bottom, &max_bottom, &min_top, &max_top, - script, &other_case)) != 8 && - (v = sscanf(buffer, "%s %x %63s %d", unichar, &properties, - script, &other_case)) != 4 && - (v = sscanf(buffer, "%s %x %63s", - unichar, &properties, script)) != 3 && - (v = sscanf(buffer, "%s %x", unichar, &properties)) != 2)) { + normed[0] = '\0'; + std::istringstream stream(buffer); + stream.imbue(std::locale::classic()); + // 标 1 0,255,0,255,0,0,0,0,0,0 Han 68 0 68 标 # 标 [6807 ]x + //stream.flags(std::ios::hex); + stream >> std::setw(255) >> unichar >> std::hex >> properties >> std::dec; + //stream.flags(std::ios::dec); + if (stream.fail()) { + fprintf(stderr, "%s:%u failed\n", __FILE__, __LINE__); return false; } + auto position = stream.tellg(); + stream.seekg(position); + char c1, c2, c3, c4, c5, c6, c7, c8, c9; + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> + width >> c5 >>width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> + advance >> c9 >> advance_sd >> std::setw(63) >> script >> + other_case >> direction >> mirror >> std::setw(63) >> normed; + if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || + c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') { + stream.clear(); + stream.seekg(position); + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> c4 >> + width >> c5 >>width_sd >> c6 >> bearing >> c7 >> bearing_sd >> c8 >> + advance >> c9 >> advance_sd >> std::setw(63) >> script >> + other_case >> direction >> mirror; + if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',' || c4 != ',' || + c5 != ',' || c6 != ',' || c7 != ',' || c8 != ',' || c9 != ',') { + stream.clear(); + stream.seekg(position); + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> + std::setw(63) >> script >> other_case >> direction >> mirror; + if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') { + stream.clear(); + stream.seekg(position); + stream >> min_bottom >> c1 >> max_bottom >> c2 >> min_top >> c3 >> max_top >> + std::setw(63) >> script >> other_case; + if (stream.fail() || c1 != ',' || c2 != ',' || c3 != ',') { + stream.clear(); + stream.seekg(position); + stream >> std::setw(63) >> script >> other_case; + if (stream.fail()) { + stream.clear(); + stream.seekg(position); + stream >> std::setw(63) >> script; + } + } + } + } + } // Skip fragments if needed. CHAR_FRAGMENT *frag = nullptr; @@ -880,9 +907,9 @@ bool UNICHARSET::load_via_fgets( this->set_advance_stats(id, advance, advance_sd); this->set_direction(id, static_cast(direction)); this->set_other_case( - id, (v > 3 && other_case < unicharset_size) ? other_case : id); - this->set_mirror(id, (v > 8 && mirror < unicharset_size) ? mirror : id); - this->set_normed(id, (v>16) ? normed : unichar); + id, (other_case < unicharset_size) ? other_case : id); + this->set_mirror(id, (mirror < unicharset_size) ? mirror : id); + this->set_normed(id, normed[0] != '\0' ? normed : unichar); } post_load_setup(); return true;