From 20ed60b31fb306ef364e60810aaad72cbc6c1bf0 Mon Sep 17 00:00:00 2001 From: Shree Devi Kumar Date: Fri, 18 Jan 2019 16:41:29 +0000 Subject: [PATCH] Fix unicharset_test --- unittest/Makefile.am | 10 ++++++++++ unittest/unichar_test.cc | 18 +++++++++++++++--- unittest/unicharset_test.cc | 31 +++++++++++++++++++++++-------- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 359bbdb11f..192c65597e 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -8,10 +8,12 @@ TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata # Absolute path of directory 'testing' with test images and ground truth texts # (using submodule test). TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing +TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\"" AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\"" AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\"" +AM_CPPFLAGS += -DTESTDATA_DIR="\"$(TESTDATA_DIR)\"" AM_CPPFLAGS += -DPANGO_ENABLE_ENGINE AM_CPPFLAGS += -I$(top_builddir)/src/api AM_CPPFLAGS += -I$(top_srcdir)/src/api @@ -123,6 +125,8 @@ check_PROGRAMS = \ if ENABLE_TRAINING check_PROGRAMS += commandlineflags_test +check_PROGRAMS += unichar_test +check_PROGRAMS += unicharset_test check_PROGRAMS += validator_test endif @@ -235,6 +239,12 @@ tabvector_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) tfile_test_SOURCES = tfile_test.cc tfile_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS) +unichar_test_SOURCES = unichar_test.cc +unichar_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS) + +unicharset_test_SOURCES = unicharset_test.cc +unicharset_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS) + validator_test_SOURCES = validator_test.cc validator_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS) diff --git a/unittest/unichar_test.cc b/unittest/unichar_test.cc index 3829c30504..c78afd2c93 100644 --- a/unittest/unichar_test.cc +++ b/unittest/unichar_test.cc @@ -1,4 +1,16 @@ -#include "tesseract/ccutil/unichar.h" +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "unichar.h" +#include "include_gunit.h" using tesseract::UNICHAR; @@ -13,7 +25,7 @@ TEST(UnicharTest, Conversion) { // Check for round-trip conversion. std::vector utf32 = UNICHAR::UTF8ToUTF32(kUTF8Src); EXPECT_THAT(utf32, testing::ElementsAreArray(kUTF32Src)); - string utf8 = UNICHAR::UTF32ToUTF8(utf32); + std::string utf8 = UNICHAR::UTF32ToUTF8(utf32); EXPECT_STREQ(kUTF8Src, utf8.c_str()); } @@ -25,7 +37,7 @@ TEST(UnicharTest, InvalidText) { std::vector utf32 = UNICHAR::UTF8ToUTF32(kInvalidUTF8); EXPECT_TRUE(utf32.empty()); // Invalid utf32 produces an empty string. - string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32); + std::string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32); EXPECT_TRUE(utf8.empty()); } diff --git a/unittest/unicharset_test.cc b/unittest/unicharset_test.cc index 8a5ad0b925..750904da8c 100644 --- a/unittest/unicharset_test.cc +++ b/unittest/unicharset_test.cc @@ -1,4 +1,19 @@ -#include "tesseract/ccutil/unicharset.h" +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "log.h" // for LOG +#include "unicharset.h" +#include "gmock/gmock.h" // for testing::ElementsAreArray +#include "include_gunit.h" using testing::ElementsAreArray; @@ -32,7 +47,7 @@ TEST(UnicharsetTest, Basics) { std::vector v(&labels[0], &labels[0] + labels.size()); EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6})); // With the fi ligature encoding fails without a pre-cleanup. - string lig_str = "af\ufb01ne"; + std::string lig_str = "af\ufb01ne"; EXPECT_FALSE( u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr)); lig_str = u.CleanupString(lig_str.c_str()); @@ -62,7 +77,7 @@ TEST(UnicharsetTest, Multibyte) { EXPECT_EQ(u.size(), 9); EXPECT_EQ(u.unichar_to_id("\u0627"), 3); EXPECT_EQ(u.unichar_to_id("\u062c"), 4); - // The first two bytes of this string is \u0627, which matches id 3; + // The first two bytes of this std::string is \u0627, which matches id 3; EXPECT_EQ(u.unichar_to_id("\u0627\u062c", 2), 3); EXPECT_EQ(u.unichar_to_id("\u062f"), 5); // Individual f and i are not present, but they are there as a pair. @@ -79,13 +94,13 @@ TEST(UnicharsetTest, Multibyte) { // With the fi ligature the fi is picked out. GenericVector lengths; int encoded_length; - string src_str = "\u0627\u062c\ufb01\u0635\u062b"; + std::string src_str = "\u0627\u062c\ufb01\u0635\u062b"; // src_str has to be pre-cleaned for lengths to be correct. - string cleaned = u.CleanupString(src_str.c_str()); + std::string cleaned = u.CleanupString(src_str.c_str()); EXPECT_TRUE(u.encode_string(cleaned.c_str(), true, &labels, &lengths, &encoded_length)); EXPECT_EQ(encoded_length, cleaned.size()); - string len_str(&lengths[0], lengths.size()); + std::string len_str(&lengths[0], lengths.size()); EXPECT_STREQ(len_str.c_str(), "\002\002\002\002\002"); v = std::vector(&labels[0], &labels[0] + labels.size()); EXPECT_THAT(v, ElementsAreArray({3, 4, 6, 8, 7})); @@ -128,8 +143,8 @@ TEST(UnicharsetTest, MultibyteBigrams) { TEST(UnicharsetTest, OldStyle) { // This test verifies an old unicharset that contains fi/fl ligatures loads // and keeps all the entries. - string filename = - file::JoinPath(FLAGS_test_srcdir, "testdata", "eng.unicharset"); + std::string filename = + file::JoinPath(TESTDATA_DIR, "eng.unicharset"); UNICHARSET u; LOG(INFO) << "Filename=" << filename; EXPECT_TRUE(u.load_from_file(filename.c_str()));