From 20ed60b31fb306ef364e60810aaad72cbc6c1bf0 Mon Sep 17 00:00:00 2001
From: Shree Devi Kumar <shreeshrii@gmail.com>
Date: Fri, 18 Jan 2019 16:41:29 +0000
Subject: [PATCH] Fix unicharset_test

---
 unittest/Makefile.am        | 10 ++++++++++
 unittest/unichar_test.cc    | 18 +++++++++++++++---
 unittest/unicharset_test.cc | 31 +++++++++++++++++++++++--------
 3 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/unittest/Makefile.am b/unittest/Makefile.am
index 359bbdb11f..192c65597e 100644
--- a/unittest/Makefile.am
+++ b/unittest/Makefile.am
@@ -8,10 +8,12 @@ TESSDATA_DIR=$(shell cd $(top_srcdir) && cd .. && pwd)/tessdata
 # Absolute path of directory 'testing' with test images and ground truth texts
 # (using submodule test).
 TESTING_DIR=$(shell cd $(top_srcdir) && pwd)/test/testing
+TESTDATA_DIR=$(shell cd $(top_srcdir) && pwd)/test/testdata
 
 AM_CPPFLAGS += -DLANGDATA_DIR="\"$(LANGDATA_DIR)\""
 AM_CPPFLAGS += -DTESSDATA_DIR="\"$(TESSDATA_DIR)\""
 AM_CPPFLAGS += -DTESTING_DIR="\"$(TESTING_DIR)\""
+AM_CPPFLAGS += -DTESTDATA_DIR="\"$(TESTDATA_DIR)\""
 AM_CPPFLAGS += -DPANGO_ENABLE_ENGINE
 AM_CPPFLAGS += -I$(top_builddir)/src/api
 AM_CPPFLAGS += -I$(top_srcdir)/src/api
@@ -123,6 +125,8 @@ check_PROGRAMS = \
 
 if ENABLE_TRAINING
 check_PROGRAMS += commandlineflags_test
+check_PROGRAMS += unichar_test
+check_PROGRAMS += unicharset_test
 check_PROGRAMS += validator_test
 endif
 
@@ -235,6 +239,12 @@ tabvector_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 tfile_test_SOURCES = tfile_test.cc
 tfile_test_LDADD = $(GTEST_LIBS) $(TESS_LIBS)
 
+unichar_test_SOURCES = unichar_test.cc
+unichar_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
+
+unicharset_test_SOURCES = unicharset_test.cc
+unicharset_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
+
 validator_test_SOURCES = validator_test.cc
 validator_test_LDADD = $(GTEST_LIBS) $(TRAINING_LIBS) $(TESS_LIBS) $(ICU_UC_LIBS)
 
diff --git a/unittest/unichar_test.cc b/unittest/unichar_test.cc
index 3829c30504..c78afd2c93 100644
--- a/unittest/unichar_test.cc
+++ b/unittest/unichar_test.cc
@@ -1,4 +1,16 @@
-#include "tesseract/ccutil/unichar.h"
+// (C) Copyright 2017, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "unichar.h"
+#include "include_gunit.h"
 
 using tesseract::UNICHAR;
 
@@ -13,7 +25,7 @@ TEST(UnicharTest, Conversion) {
   // Check for round-trip conversion.
   std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kUTF8Src);
   EXPECT_THAT(utf32, testing::ElementsAreArray(kUTF32Src));
-  string utf8 = UNICHAR::UTF32ToUTF8(utf32);
+  std::string utf8 = UNICHAR::UTF32ToUTF8(utf32);
   EXPECT_STREQ(kUTF8Src, utf8.c_str());
 }
 
@@ -25,7 +37,7 @@ TEST(UnicharTest, InvalidText) {
   std::vector<char32> utf32 = UNICHAR::UTF8ToUTF32(kInvalidUTF8);
   EXPECT_TRUE(utf32.empty());
   // Invalid utf32 produces an empty string.
-  string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32);
+  std::string utf8 = UNICHAR::UTF32ToUTF8(kInvalidUTF32);
   EXPECT_TRUE(utf8.empty());
 }
 
diff --git a/unittest/unicharset_test.cc b/unittest/unicharset_test.cc
index 8a5ad0b925..750904da8c 100644
--- a/unittest/unicharset_test.cc
+++ b/unittest/unicharset_test.cc
@@ -1,4 +1,19 @@
-#include "tesseract/ccutil/unicharset.h"
+// (C) Copyright 2017, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+#include "log.h"                        // for LOG
+#include "unicharset.h"
+#include "gmock/gmock.h"  // for testing::ElementsAreArray
+#include "include_gunit.h"
 
 using testing::ElementsAreArray;
 
@@ -32,7 +47,7 @@ TEST(UnicharsetTest, Basics) {
   std::vector<int> v(&labels[0], &labels[0] + labels.size());
   EXPECT_THAT(v, ElementsAreArray({3, 4, 4, 5, 7, 6}));
   // With the fi ligature encoding fails without a pre-cleanup.
-  string lig_str = "af\ufb01ne";
+  std::string lig_str = "af\ufb01ne";
   EXPECT_FALSE(
       u.encode_string(lig_str.c_str(), true, &labels, nullptr, nullptr));
   lig_str = u.CleanupString(lig_str.c_str());
@@ -62,7 +77,7 @@ TEST(UnicharsetTest, Multibyte) {
   EXPECT_EQ(u.size(), 9);
   EXPECT_EQ(u.unichar_to_id("\u0627"), 3);
   EXPECT_EQ(u.unichar_to_id("\u062c"), 4);
-  // The first two bytes of this string is \u0627, which matches id 3;
+  // The first two bytes of this std::string is \u0627, which matches id 3;
   EXPECT_EQ(u.unichar_to_id("\u0627\u062c", 2), 3);
   EXPECT_EQ(u.unichar_to_id("\u062f"), 5);
   // Individual f and i are not present, but they are there as a pair.
@@ -79,13 +94,13 @@ TEST(UnicharsetTest, Multibyte) {
   // With the fi ligature the fi is picked out.
   GenericVector<char> lengths;
   int encoded_length;
-  string src_str = "\u0627\u062c\ufb01\u0635\u062b";
+  std::string src_str = "\u0627\u062c\ufb01\u0635\u062b";
   // src_str has to be pre-cleaned for lengths to be correct.
-  string cleaned = u.CleanupString(src_str.c_str());
+  std::string cleaned = u.CleanupString(src_str.c_str());
   EXPECT_TRUE(u.encode_string(cleaned.c_str(), true, &labels, &lengths,
                               &encoded_length));
   EXPECT_EQ(encoded_length, cleaned.size());
-  string len_str(&lengths[0], lengths.size());
+  std::string len_str(&lengths[0], lengths.size());
   EXPECT_STREQ(len_str.c_str(), "\002\002\002\002\002");
   v = std::vector<int>(&labels[0], &labels[0] + labels.size());
   EXPECT_THAT(v, ElementsAreArray({3, 4, 6, 8, 7}));
@@ -128,8 +143,8 @@ TEST(UnicharsetTest, MultibyteBigrams) {
 TEST(UnicharsetTest, OldStyle) {
   // This test verifies an old unicharset that contains fi/fl ligatures loads
   // and keeps all the entries.
-  string filename =
-      file::JoinPath(FLAGS_test_srcdir, "testdata", "eng.unicharset");
+  std::string filename =
+      file::JoinPath(TESTDATA_DIR, "eng.unicharset");
   UNICHARSET u;
   LOG(INFO) << "Filename=" << filename;
   EXPECT_TRUE(u.load_from_file(filename.c_str()));