From 40c1cf671f9fe52a818f1bbcdadcde7845b3da49 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 30 Aug 2018 11:28:04 +0200 Subject: [PATCH] unittest: Fix and enable pango_font_info_test Signed-off-by: Stefan Weil --- unittest/Makefile.am | 17 +++-- unittest/pango_font_info_test.cc | 122 +++++++++++++++++++------------ 2 files changed, 85 insertions(+), 54 deletions(-) diff --git a/unittest/Makefile.am b/unittest/Makefile.am index f333528521..04d0d34e72 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -132,7 +132,6 @@ check_PROGRAMS += matrix_test check_PROGRAMS += nthitem_test check_PROGRAMS += osd_test # check_PROGRAMS += pagesegmode_test -# check_PROGRAMS += pango_font_info_test check_PROGRAMS += paragraphs_test check_PROGRAMS += params_model_test check_PROGRAMS += progress_test @@ -159,6 +158,7 @@ check_PROGRAMS += lstm_squashed_test check_PROGRAMS += lstm_test check_PROGRAMS += lstmtrainer_test check_PROGRAMS += normstrngs_test +check_PROGRAMS += pango_font_info_test check_PROGRAMS += unichar_test check_PROGRAMS += unicharcompress_test check_PROGRAMS += unicharset_test @@ -279,8 +279,16 @@ normstrngs_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(ICU_I18N nthitem_test_SOURCES = nthitem_test.cc nthitem_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) -#pango_font_info_test_SOURCES = pango_font_info_test.cc -#pango_font_info_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) +osd_test_SOURCES = osd_test.cc +osd_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) + +pango_font_info_test_SOURCES = pango_font_info_test.cc +pango_font_info_test_SOURCES += third_party/utf/rune.c +pango_font_info_test_SOURCES += util/utf8/unicodetext.cc util/utf8/unilib.cc +pango_font_info_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TRAINING_LIBS) $(LEPTONICA_LIBS) +pango_font_info_test_LDADD += $(ICU_I18N_LIBS) -lfontconfig +pango_font_info_test_LDADD += -lpangocairo-1.0 -lpangoft2-1.0 +pango_font_info_test_LDADD += $(cairo_LIBS) $(pango_LIBS) paragraphs_test_SOURCES = paragraphs_test.cc paragraphs_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) @@ -288,9 +296,6 @@ paragraphs_test_LDADD = $(ABSEIL_LIBS) $(GTEST_LIBS) $(TESS_LIBS) params_model_test_SOURCES = params_model_test.cc params_model_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) -osd_test_SOURCES = osd_test.cc -osd_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) - progress_test_SOURCES = progress_test.cc progress_test_LDFLAGS = $(OPENCL_LDFLAGS) $(LEPTONICA_LIBS) progress_test_LDADD = $(GTEST_LIBS) $(GMOCK_LIBS) $(TESS_LIBS) $(LEPTONICA_LIBS) diff --git a/unittest/pango_font_info_test.cc b/unittest/pango_font_info_test.cc index fde4f34f6f..de6290340f 100644 --- a/unittest/pango_font_info_test.cc +++ b/unittest/pango_font_info_test.cc @@ -1,12 +1,24 @@ - -#include "tesseract/training/pango_font_info.h" - -#include -#include - -#include "pango/pango.h" -#include "tesseract/training/commandlineflags.h" -#include "tesseract/training/fileio.h" +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "include_gunit.h" +#include "commandlineflags.h" +#include "fileio.h" +#include "pango_font_info.h" +#include "absl/strings/str_cat.h" // for absl::StrCat +#include "gmock/gmock-matchers.h" // for EXPECT_THAT +#include "util/utf8/unicodetext.h" // for UnicodeText DECLARE_STRING_PARAM_FLAG(fonts_dir); DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); @@ -19,19 +31,19 @@ using tesseract::FontUtils; using tesseract::PangoFontInfo; // Fonts in testdata directory -const char* kExpectedFontNames[] = {"Arab", - "Arial Bold Italic", - "DejaVu Sans Ultra-Light", - "Lohit Hindi", +const char* kExpectedFontNames[] = { + "Arab", + "Arial Bold Italic", + "DejaVu Sans Ultra-Light", + "Lohit Hindi", #if PANGO_VERSION <= 12005 - "Times New Roman", + "Times New Roman", #else - "Times New Roman,", // Pango v1.36.2 - // requires a trailing - // ',' + "Times New Roman,", // Pango v1.36.2 requires a trailing ',' #endif - "UnBatang", - "Verdana"}; + "UnBatang", + "Verdana" +}; // Sample text used in tests. const char kArabicText[] = "والفكر والصراع 1234,\nوالفكر والصراع"; @@ -41,23 +53,27 @@ const char kKorText[] = "이는 것으로"; // Hindi words containing illegal vowel sequences. const char* kBadlyFormedHinWords[] = { #if PANGO_VERSION <= 12005 - "उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस", + "उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस", #endif - // Pango v1.36.2 will render the above words even though they are invalid. - "प्रंात", nullptr}; + // Pango v1.36.2 will render the above words even though they are invalid. + "प्रंात", nullptr +}; class PangoFontInfoTest : public ::testing::Test { protected: void SetUp() override { - std::locale::global(std::locale("")); + static std::locale system_locale(""); + std::locale::global(system_locale); } // Creates a fake fonts.conf file that points to the testdata fonts for // fontconfig to initialize with. static void SetUpTestCase() { - FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata"); + FLAGS_fonts_dir = TESTING_DIR; FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir; +#ifdef GOOGLE_TESSERACT FLAGS_use_only_legacy_fonts = false; +#endif } PangoFontInfo font_info_; @@ -120,7 +136,7 @@ TEST_F(PangoFontInfoTest, CanRenderLigature) { font_info_.ParseFontDescriptionName("Arab 12"); const char kArabicLigature[] = "لا"; EXPECT_TRUE( - font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature))); + font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature))); printf("Next word\n"); EXPECT_TRUE(font_info_.CanRenderString(kArabicText, strlen(kArabicText))); @@ -143,17 +159,17 @@ TEST_F(PangoFontInfoTest, CannotRenderInvalidString) { TEST_F(PangoFontInfoTest, CanDropUncoveredChars) { font_info_.ParseFontDescriptionName("Verdana 12"); // Verdana cannot render the "ff" ligature - string word = "office"; + std::string word = "office"; EXPECT_EQ(1, font_info_.DropUncoveredChars(&word)); EXPECT_EQ("oice", word); // Don't drop non-letter characters like word joiners. const char* kJoiners[] = { - "\u2060", // U+2060 (WJ) - "\u200C", // U+200C (ZWJ) - "\u200D" // U+200D (ZWNJ) + "\u2060", // U+2060 (WJ) + "\u200C", // U+200C (ZWJ) + "\u200D" // U+200D (ZWNJ) }; - for (int i = 0; i < ARRAYSIZE(kJoiners); ++i) { + for (size_t i = 0; i < ARRAYSIZE(kJoiners); ++i) { word = kJoiners[i]; EXPECT_EQ(0, font_info_.DropUncoveredChars(&word)); EXPECT_STREQ(kJoiners[i], word.c_str()); @@ -167,17 +183,21 @@ class FontUtilsTest : public ::testing::Test { // Creates a fake fonts.conf file that points to the testdata fonts for // fontconfig to initialize with. static void SetUpTestCase() { - FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata"); + FLAGS_fonts_dir = TESTING_DIR; FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir; } void CountUnicodeChars(const char* utf8_text, - std::unordered_map* ch_map) { + std::unordered_map* ch_map) { ch_map->clear(); UnicodeText ut; ut.PointToUTF8(utf8_text, strlen(utf8_text)); for (UnicodeText::const_iterator it = ut.begin(); it != ut.end(); ++it) { +#if 0 if (UnicodeProps::IsWhitespace(*it)) continue; +#else + if (std::isspace(*it)) continue; +#endif ++(*ch_map)[*it]; } } @@ -206,21 +226,21 @@ TEST_F(FontUtilsTest, DoesDetectMissingFonts) { } TEST_F(FontUtilsTest, DoesListAvailableFonts) { - const std::vector& fonts = FontUtils::ListAvailableFonts(); + const std::vector& fonts = FontUtils::ListAvailableFonts(); EXPECT_THAT(fonts, ::testing::ElementsAreArray(kExpectedFontNames)); - for (int i = 0; i < fonts.size(); ++i) { + for (auto& font : fonts) { PangoFontInfo font_info; - EXPECT_TRUE(font_info.ParseFontDescriptionName(fonts[i])); + EXPECT_TRUE(font_info.ParseFontDescriptionName(font)); } } TEST_F(FontUtilsTest, DoesFindBestFonts) { - string fonts_list; - std::unordered_map ch_map; + std::string fonts_list; + std::unordered_map ch_map; CountUnicodeChars(kEngText, &ch_map); EXPECT_EQ(26, ch_map.size()); // 26 letters std::vector > > font_flags; - string best_list = FontUtils::BestFonts(ch_map, &font_flags); + std::string best_list = FontUtils::BestFonts(ch_map, &font_flags); EXPECT_TRUE(best_list.size()); // All fonts except Lohit Hindi should render English text. EXPECT_EQ(ARRAYSIZE(kExpectedFontNames) - 1, font_flags.size()); @@ -238,8 +258,8 @@ TEST_F(FontUtilsTest, DoesSelectFont) { const char* kLangNames[] = {"Arabic", "English", "Hindi", "Korean", nullptr}; for (int i = 0; kLangText[i] != nullptr; ++i) { SCOPED_TRACE(kLangNames[i]); - std::vector graphemes; - string selected_font; + std::vector graphemes; + std::string selected_font; EXPECT_TRUE(FontUtils::SelectFont(kLangText[i], strlen(kLangText[i]), &selected_font, &graphemes)); EXPECT_TRUE(selected_font.size()); @@ -249,17 +269,17 @@ TEST_F(FontUtilsTest, DoesSelectFont) { TEST_F(FontUtilsTest, DoesFailToSelectFont) { const char kMixedScriptText[] = "पिताने विवाह की | والفكر والصراع"; - std::vector graphemes; - string selected_font; + std::vector graphemes; + std::string selected_font; EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, strlen(kMixedScriptText), &selected_font, &graphemes)); } TEST_F(FontUtilsTest, GetAllRenderableCharacters) { - const int32 kHindiChar = 0x0905; - const int32 kArabicChar = 0x0623; - const int32 kMongolianChar = 0x180E; // Mongolian vowel separator - const int32 kOghamChar = 0x1680; // Ogham space mark + const int32_t kHindiChar = 0x0905; + const int32_t kArabicChar = 0x0623; + const int32_t kMongolianChar = 0x180E; // Mongolian vowel separator + const int32_t kOghamChar = 0x1680; // Ogham space mark std::vector unicode_mask; FontUtils::GetAllRenderableCharacters(&unicode_mask); EXPECT_TRUE(unicode_mask['A']); @@ -267,10 +287,12 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) { EXPECT_TRUE(unicode_mask[kHindiChar]); EXPECT_TRUE(unicode_mask[kArabicChar]); EXPECT_FALSE(unicode_mask[kMongolianChar]); // no font for mongolian. +#if 0 // TODO: check fails because DejaVu Sans Ultra-Light supports ogham EXPECT_FALSE(unicode_mask[kOghamChar]); // no font for ogham. +#endif unicode_mask.clear(); - std::vector selected_fonts; + std::vector selected_fonts; selected_fonts.push_back("Lohit Hindi"); FontUtils::GetAllRenderableCharacters(selected_fonts, &unicode_mask); EXPECT_TRUE(unicode_mask['1']); @@ -279,14 +301,18 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) { EXPECT_FALSE(unicode_mask[kArabicChar]); // or Arabic, EXPECT_FALSE(unicode_mask[kMongolianChar]); // or Mongolian, EXPECT_FALSE(unicode_mask[kOghamChar]); // or Ogham. + unicode_mask.clear(); // Check that none of the included fonts cover the Mongolian or Ogham space // characters. - for (int f = 0; f < ARRAYSIZE(kExpectedFontNames); ++f) { + for (size_t f = 0; f < ARRAYSIZE(kExpectedFontNames); ++f) { SCOPED_TRACE(absl::StrCat("Testing ", kExpectedFontNames[f])); FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], &unicode_mask); +#if 0 // TODO: check fails because DejaVu Sans Ultra-Light supports ogham EXPECT_FALSE(unicode_mask[kOghamChar]); +#endif EXPECT_FALSE(unicode_mask[kMongolianChar]); + unicode_mask.clear(); } } } // namespace