diff --git a/unittest/apiexample_test.cc b/unittest/apiexample_test.cc index 8a2bfd730f..438b9216ad 100644 --- a/unittest/apiexample_test.cc +++ b/unittest/apiexample_test.cc @@ -2,7 +2,8 @@ // File: apiexample_test.cc // Description: Api Test for Tesseract using text fixtures and parameters. // Tests for Devanagari, Latin and Arabic scripts are disabled by default. -// Disabled tests can be run when required by using the --gtest_also_run_disabled_tests argument. +// Disabled tests can be run when required by using the +// --gtest_also_run_disabled_tests argument. // ./unittest/apiexample_test --gtest_also_run_disabled_tests // // Author: ShreeDevi Kumar @@ -21,91 +22,89 @@ // expects clone of tessdata_fast repo in ../../tessdata_fast //#include "log.h" -#include "include_gunit.h" -#include "baseapi.h" -#include "leptonica/allheaders.h" -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include "baseapi.h" +#include "include_gunit.h" +#include "leptonica/allheaders.h" namespace { class QuickTest : public testing::Test { protected: - virtual void SetUp() { - start_time_ = time(nullptr); - } + virtual void SetUp() { start_time_ = time(nullptr); } virtual void TearDown() { const time_t end_time = time(nullptr); - EXPECT_TRUE(end_time - start_time_ <=55) << "The test took too long - " << ::testing::PrintToString(end_time - start_time_); + EXPECT_TRUE(end_time - start_time_ <= 55) + << "The test took too long - " + << ::testing::PrintToString(end_time - start_time_); } time_t start_time_; - }; +}; - void OCRTester(const char* imgname, const char* groundtruth, const char* tessdatadir, const char* lang) { - //log.info() << tessdatadir << " for language: " << lang << std::endl; - char *outText; - std::locale loc("C"); // You can also use "" for the default system locale - std::ifstream file(groundtruth); - file.imbue(loc); // Use it for file input - std::string gtText((std::istreambuf_iterator(file)), std::istreambuf_iterator()); - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); - ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); - ASSERT_TRUE(image != nullptr) << "Failed to read test image."; - api->SetImage(image); - outText = api->GetUTF8Text(); - EXPECT_EQ(gtText,outText) << "Phototest.tif OCR does not match ground truth for " << ::testing::PrintToString(lang); - api->End(); - delete [] outText; - pixDestroy(&image); - } +void OCRTester(const char* imgname, const char* groundtruth, + const char* tessdatadir, const char* lang) { + // log.info() << tessdatadir << " for language: " << lang << std::endl; + char* outText; + std::locale loc("C"); // You can also use "" for the default system locale + std::ifstream file(groundtruth); + file.imbue(loc); // Use it for file input + std::string gtText((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); + ASSERT_FALSE(api->Init(tessdatadir, lang)) + << "Could not initialize tesseract."; + Pix* image = pixRead(imgname); + ASSERT_TRUE(image != nullptr) << "Failed to read test image."; + api->SetImage(image); + outText = api->GetUTF8Text(); + EXPECT_EQ(gtText, outText) + << "Phototest.tif OCR does not match ground truth for " + << ::testing::PrintToString(lang); + api->End(); + delete[] outText; + pixDestroy(&image); +} - class MatchGroundTruth : public QuickTest , - public ::testing::WithParamInterface { - }; +class MatchGroundTruth : public QuickTest, + public ::testing::WithParamInterface {}; - TEST_P(MatchGroundTruth, FastPhototestOCR) { - OCRTester(TESTING_DIR "/phototest.tif", - TESTING_DIR "/phototest.txt", - TESSDATA_DIR "_fast", GetParam()); - } +TEST_P(MatchGroundTruth, FastPhototestOCR) { + OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", + TESSDATA_DIR "_fast", GetParam()); +} - TEST_P(MatchGroundTruth, BestPhototestOCR) { - OCRTester(TESTING_DIR "/phototest.tif", - TESTING_DIR "/phototest.txt", - TESSDATA_DIR "_best", GetParam()); - } +TEST_P(MatchGroundTruth, BestPhototestOCR) { + OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", + TESSDATA_DIR "_best", GetParam()); +} - TEST_P(MatchGroundTruth, TessPhototestOCR) { - OCRTester(TESTING_DIR "/phototest.tif", - TESTING_DIR "/phototest.txt", - TESSDATA_DIR , GetParam()); - } +TEST_P(MatchGroundTruth, TessPhototestOCR) { + OCRTester(TESTING_DIR "/phototest.tif", TESTING_DIR "/phototest.txt", + TESSDATA_DIR, GetParam()); +} - INSTANTIATE_TEST_CASE_P( Eng, MatchGroundTruth, - ::testing::Values("eng") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Latin, MatchGroundTruth, - ::testing::Values("script/Latin") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Deva, MatchGroundTruth, - ::testing::Values("script/Devanagari") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Arabic, MatchGroundTruth, - ::testing::Values("script/Arabic") ); +INSTANTIATE_TEST_CASE_P(Eng, MatchGroundTruth, ::testing::Values("eng")); +INSTANTIATE_TEST_CASE_P(DISABLED_Latin, MatchGroundTruth, + ::testing::Values("script/Latin")); +INSTANTIATE_TEST_CASE_P(DISABLED_Deva, MatchGroundTruth, + ::testing::Values("script/Devanagari")); +INSTANTIATE_TEST_CASE_P(DISABLED_Arabic, MatchGroundTruth, + ::testing::Values("script/Arabic")); - class EuroText : public QuickTest { - }; +class EuroText : public QuickTest {}; - TEST_F(EuroText, FastLatinOCR) { - OCRTester(TESTING_DIR "/eurotext.tif", - TESTING_DIR "/eurotext.txt", - TESSDATA_DIR "_fast", "script/Latin"); - } +TEST_F(EuroText, FastLatinOCR) { + OCRTester(TESTING_DIR "/eurotext.tif", TESTING_DIR "/eurotext.txt", + TESSDATA_DIR "_fast", "script/Latin"); +} - // script/Latin for eurotext.tif does not match groundtruth - // for tessdata & tessdata_best. - // so do not test these here. +// script/Latin for eurotext.tif does not match groundtruth +// for tessdata & tessdata_best. +// so do not test these here. } // namespace diff --git a/unittest/applybox_test.cc b/unittest/applybox_test.cc index d707fff07d..299c9097d4 100644 --- a/unittest/applybox_test.cc +++ b/unittest/applybox_test.cc @@ -31,19 +31,13 @@ class ApplyBoxTest : public testing::Test { std::string TestDataNameToPath(const std::string& name) { return file::JoinPath(TESTING_DIR, name); } - std::string TessdataPath() { - return TESSDATA_DIR; - } + std::string TessdataPath() { return TESSDATA_DIR; } std::string OutputNameToPath(const std::string& name) { return file::JoinPath(FLAGS_test_tmpdir, name); } - ApplyBoxTest() { - src_pix_ = NULL; - } - ~ApplyBoxTest() { - pixDestroy(&src_pix_); - } + ApplyBoxTest() { src_pix_ = NULL; } + ~ApplyBoxTest() { pixDestroy(&src_pix_); } void SetImage(const char* filename) { pixDestroy(&src_pix_); @@ -70,7 +64,7 @@ class ApplyBoxTest : public testing::Test { api_.Recognize(NULL); char* ocr_text = api_.GetUTF8Text(); EXPECT_STREQ(truth_str, ocr_text); - delete [] ocr_text; + delete[] ocr_text; // Test the boxes by reading the target box file in parallel with the // bounding boxes in the ocr output. std::string box_filename = TestDataNameToPath(target_box_file); @@ -80,22 +74,21 @@ class ApplyBoxTest : public testing::Test { ResultIterator* it = api_.GetIterator(); do { int left, top, right, bottom; - EXPECT_TRUE(it->BoundingBox(tesseract::RIL_SYMBOL, - &left, &top, &right, &bottom)); - TBOX ocr_box(ICOORD(left, height - bottom), - ICOORD(right, height - top)); + EXPECT_TRUE( + it->BoundingBox(tesseract::RIL_SYMBOL, &left, &top, &right, &bottom)); + TBOX ocr_box(ICOORD(left, height - bottom), ICOORD(right, height - top)); int line_number; TBOX truth_box; STRING box_text; - EXPECT_TRUE(ReadNextBox(0, &line_number, box_file, &box_text, - &truth_box)); + EXPECT_TRUE( + ReadNextBox(0, &line_number, box_file, &box_text, &truth_box)); // Testing for major overlap is a bit weak, but if they all // major overlap successfully, then it has to be fairly close. EXPECT_TRUE(ocr_box.major_overlap(truth_box)); // Also check that the symbol text matches the box text. char* symbol_text = it->GetUTF8Text(tesseract::RIL_SYMBOL); EXPECT_STREQ(box_text.string(), symbol_text); - delete [] symbol_text; + delete[] symbol_text; } while (it->Next(tesseract::RIL_SYMBOL)); delete it; } @@ -107,14 +100,14 @@ class ApplyBoxTest : public testing::Test { // Tests character-level applyboxes on normal Times New Roman. TEST_F(ApplyBoxTest, TimesCharLevel) { - VerifyBoxesAndText("trainingtimes.tif", kTruthTextWords, - "trainingtimes.box", false); + VerifyBoxesAndText("trainingtimes.tif", kTruthTextWords, "trainingtimes.box", + false); } // Tests character-level applyboxes on italic Times New Roman. TEST_F(ApplyBoxTest, ItalicCharLevel) { - VerifyBoxesAndText("trainingital.tif", kTruthTextWords, - "trainingital.box", false); + VerifyBoxesAndText("trainingital.tif", kTruthTextWords, "trainingital.box", + false); } // Tests line-level applyboxes on normal Times New Roman. @@ -125,8 +118,8 @@ TEST_F(ApplyBoxTest, TimesLineLevel) { // Tests line-level applyboxes on italic Times New Roman. TEST_F(ApplyBoxTest, ItalLineLevel) { - VerifyBoxesAndText("trainingitalline.tif", kTruthTextLine, - "trainingital.box", true); + VerifyBoxesAndText("trainingitalline.tif", kTruthTextLine, "trainingital.box", + true); } } // namespace diff --git a/unittest/baseapi_test.cc b/unittest/baseapi_test.cc index 342da9dc6f..ca8b5dcab0 100644 --- a/unittest/baseapi_test.cc +++ b/unittest/baseapi_test.cc @@ -8,8 +8,8 @@ namespace { -using ::testing::HasSubstr; using ::testing::ContainsRegex; +using ::testing::HasSubstr; const char* langs[] = {"eng", "vie", "hin", "ara", NULL}; const char* image_files[] = {"HelloGoogle.tif", "viet.tif", "raaj.tif", @@ -25,7 +25,7 @@ class FriendlyTessBaseAPI : public tesseract::TessBaseAPI { string GetCleanedTextResult(tesseract::TessBaseAPI* tess, Pix* pix) { tess->SetImage(pix); - char *result = tess->GetUTF8Text(); + char* result = tess->GetUTF8Text(); string ocr_result = result; delete[] result; absl::StripAsciiWhitespace(&ocr_result); @@ -36,19 +36,18 @@ string GetCleanedTextResult(tesseract::TessBaseAPI* tess, Pix* pix) { class TesseractTest : public testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessdataPath() { - return file::JoinPath(FLAGS_test_srcdir, - "tessdata"); + return file::JoinPath(FLAGS_test_srcdir, "tessdata"); } }; // Tests that array sizes match their intended size. TEST_F(TesseractTest, ArraySizeTest) { int size = 0; - for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size); + for (size = 0; kPolyBlockNames[size][0] != '\0'; ++size) + ; EXPECT_EQ(size, PT_COUNT); } @@ -58,7 +57,7 @@ TEST_F(TesseractTest, BasicTesseractTest) { string truth_text; string ocr_text; api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY); - Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"), @@ -75,14 +74,14 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) { api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY); api.SetPageSegMode(tesseract::PSM_SINGLE_BLOCK); api.SetVariable("paragraph_debug_level", "3"); - Pix *src_pix = pixRead(TestDataNameToPath("b622.png").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("b622.png").c_str()); CHECK(src_pix); api.SetImage(src_pix); - Boxa* para_boxes = api.GetComponentImages(tesseract::RIL_PARA, - true, NULL, NULL); + Boxa* para_boxes = + api.GetComponentImages(tesseract::RIL_PARA, true, NULL, NULL); EXPECT_TRUE(para_boxes != NULL); - Boxa* block_boxes = api.GetComponentImages(tesseract::RIL_BLOCK, - true, NULL, NULL); + Boxa* block_boxes = + api.GetComponentImages(tesseract::RIL_BLOCK, true, NULL, NULL); EXPECT_TRUE(block_boxes != NULL); // TODO(eger): Get paragraphs out of this page pre-text. EXPECT_GE(boxaGetCount(para_boxes), boxaGetCount(block_boxes)); @@ -96,14 +95,14 @@ TEST_F(TesseractTest, IteratesParagraphsEvenIfNotDetected) { TEST_F(TesseractTest, HOCRWorksWithoutSetInputName) { tesseract::TessBaseAPI api; api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY); - Pix *src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("HelloGoogle.tif").c_str()); CHECK(src_pix); api.SetImage(src_pix); - char *result = api.GetHOCRText(0); + char* result = api.GetHOCRText(0); EXPECT_TRUE(result != NULL); EXPECT_THAT(result, HasSubstr("Hello")); EXPECT_THAT(result, HasSubstr("
]* " "baseline [-.0-9]+ [-.0-9]+")); - delete [] result; + delete[] result; pixDestroy(&src_pix); } @@ -131,13 +130,13 @@ TEST_F(TesseractTest, HOCRContainsBaseline) { TEST_F(TesseractTest, RickSnyderNotFuckSnyder) { tesseract::TessBaseAPI api; api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_TESSERACT_ONLY); - Pix *src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("rick_snyder.jpeg").c_str()); CHECK(src_pix); api.SetImage(src_pix); - char *result = api.GetHOCRText(0); + char* result = api.GetHOCRText(0); EXPECT_TRUE(result != NULL); EXPECT_THAT(result, Not(HasSubstr("FUCK"))); - delete [] result; + delete[] result; pixDestroy(&src_pix); } @@ -146,19 +145,12 @@ TEST_F(TesseractTest, AdaptToWordStrTest) { static const char* kTrainingPages[] = { "136.tif", "256.tif", "410.tif", "432.tif", "540.tif", "692.tif", "779.tif", "793.tif", "808.tif", "815.tif", - "12.tif", "12.tif", NULL - }; + "12.tif", "12.tif", NULL}; static const char* kTrainingText[] = { - "1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0", - "6 9 2", "7 7 9", "7 9 3", "8 0 8", "8 1 5", - "1 2", "1 2", NULL - }; - static const char* kTestPages[] = { - "324.tif", "433.tif", "12.tif", NULL - }; - static const char* kTestText[] = { - "324", "433", "12", NULL - }; + "1 3 6", "2 5 6", "4 1 0", "4 3 2", "5 4 0", "6 9 2", "7 7 9", + "7 9 3", "8 0 8", "8 1 5", "1 2", "1 2", NULL}; + static const char* kTestPages[] = {"324.tif", "433.tif", "12.tif", NULL}; + static const char* kTestText[] = {"324", "433", "12", NULL}; tesseract::TessBaseAPI api; string truth_text; string ocr_text; @@ -168,20 +160,20 @@ TEST_F(TesseractTest, AdaptToWordStrTest) { // Train on the training text. for (int i = 0; kTrainingPages[i] != NULL; ++i) { string image_file = TestDataNameToPath(kTrainingPages[i]); - Pix *src_pix = pixRead(image_file.c_str()); + Pix* src_pix = pixRead(image_file.c_str()); CHECK(src_pix); api.SetImage(src_pix); - EXPECT_TRUE(api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, - kTrainingText[i])) - << "Failed to adapt to text \"" << kTrainingText[i] - << "\" on image " << image_file; + EXPECT_TRUE( + api.AdaptToWordStr(tesseract::PSM_SINGLE_WORD, kTrainingText[i])) + << "Failed to adapt to text \"" << kTrainingText[i] << "\" on image " + << image_file; pixDestroy(&src_pix); } // Test the test text. api.SetVariable("tess_bn_matching", "1"); api.SetPageSegMode(tesseract::PSM_SINGLE_WORD); for (int i = 0; kTestPages[i] != NULL; ++i) { - Pix *src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str()); + Pix* src_pix = pixRead(TestDataNameToPath(kTestPages[i]).c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); absl::StripAsciiWhitespace(&truth_text); @@ -196,7 +188,7 @@ TEST_F(TesseractTest, BasicLSTMTest) { string truth_text; string ocr_text; api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY); - Pix *src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("phototest_2.tif").c_str()); CHECK(src_pix); ocr_text = GetCleanedTextResult(&api, src_pix); CHECK_OK(file::GetContents(TestDataNameToPath("phototest.gold.txt"), @@ -213,22 +205,22 @@ TEST_F(TesseractTest, BasicLSTMTest) { // errors due to float/int conversions (e.g., see OUTLINE::move() in // ccstruct/poutline.h) Instead, we do a loose check. TEST_F(TesseractTest, LSTMGeometryTest) { - Pix *src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("deslant.tif").c_str()); FriendlyTessBaseAPI api; api.Init(TessdataPath().c_str(), "eng", tesseract::OEM_LSTM_ONLY); api.SetImage(src_pix); ASSERT_EQ(api.Recognize(NULL), 0); - const PAGE_RES *page_res = api.GetPageRes(); - PAGE_RES_IT page_res_it(const_cast(page_res)); + const PAGE_RES* page_res = api.GetPageRes(); + PAGE_RES_IT page_res_it(const_cast(page_res)); page_res_it.restart_page(); BLOCK* block = page_res_it.block()->block; CHECK(block); // extract word and character boxes for each word - for (page_res_it.restart_page(); page_res_it.word () != NULL; + for (page_res_it.restart_page(); page_res_it.word() != NULL; page_res_it.forward()) { - WERD_RES *word = page_res_it.word(); + WERD_RES* word = page_res_it.word(); CHECK(word); CHECK(word->best_choice); CHECK_GT(word->best_choice->length(), 0); @@ -255,13 +247,13 @@ TEST_F(TesseractTest, LSTMGeometryTest) { TEST_F(TesseractTest, InitConfigOnlyTest) { // Languages for testing initialization. - const char* langs[] = { "eng", "chi_tra", "jpn", "vie", "hin"}; + const char* langs[] = {"eng", "chi_tra", "jpn", "vie", "hin"}; std::unique_ptr api; CycleTimer timer; for (int i = 0; i < ARRAYSIZE(langs); ++i) { api.reset(new tesseract::TessBaseAPI); timer.Restart(); - EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i] , + EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i], tesseract::OEM_TESSERACT_ONLY)); timer.Stop(); LOG(INFO) << "Lang " << langs[i] << " took " << timer.GetInMs() @@ -275,9 +267,9 @@ TEST_F(TesseractTest, InitConfigOnlyTest) { for (int i = 0; i < ARRAYSIZE(langs); ++i) { api.reset(new tesseract::TessBaseAPI); timer.Restart(); - EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i] , - tesseract::OEM_TESSERACT_ONLY, NULL, 0, - &vars_vec, &vars_values, false)); + EXPECT_EQ(0, api->Init(TessdataPath().c_str(), langs[i], + tesseract::OEM_TESSERACT_ONLY, NULL, 0, &vars_vec, + &vars_values, false)); timer.Stop(); LOG(INFO) << "Lang " << langs[i] << " took " << timer.GetInMs() << "ms in config-only init"; @@ -294,15 +286,13 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) { int num_langs = 0; while (langs[num_langs] != NULL) ++num_langs; - const string kTessdataPath = file::JoinPath( - FLAGS_test_srcdir,"tessdata"); + const string kTessdataPath = file::JoinPath(FLAGS_test_srcdir, "tessdata"); // Preload images and verify that OCR is correct on them individually. - std::vector pix(num_langs); + std::vector pix(num_langs); for (int i = 0; i < num_langs; ++i) { SCOPED_TRACE(absl::StrCat("Single instance test with lang = ", langs[i])); - string path = FLAGS_test_srcdir - + "/testdata/" + image_files[i]; + string path = FLAGS_test_srcdir + "/testdata/" + image_files[i]; pix[i] = pixRead(path.c_str()); QCHECK(pix[i] != NULL) << "Could not read " << path; @@ -329,32 +319,30 @@ TEST(TesseractInstanceTest, TestMultipleTessInstances) { } } - for (int i = 0; i < num_langs; ++i) - pixDestroy(&pix[i]); + for (int i = 0; i < num_langs; ++i) pixDestroy(&pix[i]); } // Tests whether Tesseract parameters are correctly set for the two instances. TEST(TesseractInstanceTest, TestMultipleTessInstanceVariables) { string illegal_name = "an_illegal_name"; - string langs[2] = { "eng", "hin" }; + string langs[2] = {"eng", "hin"}; string int_param_name = "tessedit_pageseg_mode"; - int int_param[2] = { 1, 2 }; - string int_param_str[2] = { "1", "2" }; + int int_param[2] = {1, 2}; + string int_param_str[2] = {"1", "2"}; string bool_param_name = "tessedit_ambigs_training"; - bool bool_param[2] = { false, true }; - string bool_param_str[2] = { "F", "T" }; + bool bool_param[2] = {false, true}; + string bool_param_str[2] = {"F", "T"}; string str_param_name = "tessedit_char_blacklist"; - string str_param[2] = { "abc", "def" }; + string str_param[2] = {"abc", "def"}; string double_param_name = "segment_penalty_dict_frequent_word"; - string double_param_str[2] = { "0.01", "2" }; - double double_param[2] = { 0.01, 2 }; + string double_param_str[2] = {"0.01", "2"}; + double double_param[2] = {0.01, 2}; - const string kTessdataPath = file::JoinPath( - FLAGS_test_srcdir,"tessdata"); + const string kTessdataPath = file::JoinPath(FLAGS_test_srcdir, "tessdata"); tesseract::TessBaseAPI tess1, tess2; for (int i = 0; i < 2; ++i) { - tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2; + tesseract::TessBaseAPI* api = (i == 0) ? &tess1 : &tess2; api->Init(kTessdataPath.c_str(), langs[i].c_str()); api->SetVariable(illegal_name.c_str(), "none"); api->SetVariable(int_param_name.c_str(), int_param_str[i].c_str()); @@ -363,7 +351,7 @@ TEST(TesseractInstanceTest, TestMultipleTessInstanceVariables) { api->SetVariable(double_param_name.c_str(), double_param_str[i].c_str()); } for (int i = 0; i < 2; ++i) { - tesseract::TessBaseAPI *api = (i == 0) ? &tess1 : &tess2; + tesseract::TessBaseAPI* api = (i == 0) ? &tess1 : &tess2; EXPECT_FALSE(api->GetStringVariable(illegal_name.c_str())); int intvar; EXPECT_TRUE(api->GetIntVariable(int_param_name.c_str(), &intvar)); diff --git a/unittest/baseapi_thread_test.cc b/unittest/baseapi_thread_test.cc index aafe4a2fca..267daa79b7 100644 --- a/unittest/baseapi_thread_test.cc +++ b/unittest/baseapi_thread_test.cc @@ -37,18 +37,16 @@ using tesseract::TessBaseAPI; namespace { -const char* kTessLangs[] = { "eng", "vie", NULL }; -const char* kTessImages[] = { "HelloGoogle.tif", "viet.tif", NULL }; -const char* kTessTruthText[] = { "Hello Google", "\x74\x69\xe1\xba\xbf\x6e\x67", - NULL }; +const char* kTessLangs[] = {"eng", "vie", NULL}; +const char* kTessImages[] = {"HelloGoogle.tif", "viet.tif", NULL}; +const char* kTessTruthText[] = {"Hello Google", "\x74\x69\xe1\xba\xbf\x6e\x67", + NULL}; -const char* kCubeLangs[] = { "hin", "ara", NULL }; -const char* kCubeImages[] = { "raaj.tif", "arabic.tif", NULL}; +const char* kCubeLangs[] = {"hin", "ara", NULL}; +const char* kCubeImages[] = {"raaj.tif", "arabic.tif", NULL}; const char* kCubeTruthText[] = { - "\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c", - "\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", - NULL}; - + "\xe0\xa4\xb0\xe0\xa4\xbe\xe0\xa4\x9c", + "\xd8\xa7\xd9\x84\xd8\xb9\xd8\xb1\xd8\xa8\xd9\x8a", NULL}; class BaseapiThreadTest : public ::testing::Test { protected: @@ -85,16 +83,16 @@ class BaseapiThreadTest : public ::testing::Test { // and so entirely disallow concurrent access of a Pix instance. const int n = num_langs_ * FLAGS_reps; for (int i = 0; i < n; ++i) { - string path = FLAGS_test_srcdir + - "/testdata/" + - image_files[i % num_langs_]; + string path = + FLAGS_test_srcdir + "/testdata/" + image_files[i % num_langs_]; Pix* new_pix = pixRead(path.c_str()); QCHECK(new_pix != NULL) << "Could not read " << path; pix_.push_back(new_pix); } - pool_size_ = (FLAGS_max_concurrent_instances < 1) ? - num_langs_ * FLAGS_reps : FLAGS_max_concurrent_instances; + pool_size_ = (FLAGS_max_concurrent_instances < 1) + ? num_langs_ * FLAGS_reps + : FLAGS_max_concurrent_instances; } static void TearDownTestCase() { @@ -108,9 +106,7 @@ class BaseapiThreadTest : public ::testing::Test { pool_->StartWorkers(); } - void WaitForPoolWorkers() { - pool_.reset(NULL); - } + void WaitForPoolWorkers() { pool_.reset(NULL); } std::unique_ptr pool_; static int pool_size_; @@ -127,25 +123,23 @@ std::vector BaseapiThreadTest::langs_; std::vector BaseapiThreadTest::gt_text_; int BaseapiThreadTest::num_langs_; - void InitTessInstance(TessBaseAPI* tess, const string& lang) { CHECK(tess != nullptr); - const string kTessdataPath = file::JoinPath( - FLAGS_test_srcdir, "tessdata"); + const string kTessdataPath = file::JoinPath(FLAGS_test_srcdir, "tessdata"); EXPECT_EQ(0, tess->Init(kTessdataPath.c_str(), lang.c_str())); } void GetCleanedText(TessBaseAPI* tess, Pix* pix, string* ocr_text) { tess->SetImage(pix); - char *result = tess->GetUTF8Text(); + char* result = tess->GetUTF8Text(); *ocr_text = result; delete[] result; absl::StripAsciiWhitespace(ocr_text); } void VerifyTextResult(TessBaseAPI* tess, Pix* pix, const string& lang, - const string& expected_text) { - TessBaseAPI *tess_local = NULL; + const string& expected_text) { + TessBaseAPI* tess_local = NULL; if (tess) { tess_local = tess; } else { @@ -155,11 +149,9 @@ void VerifyTextResult(TessBaseAPI* tess, Pix* pix, const string& lang, string ocr_text; GetCleanedText(tess_local, pix, &ocr_text); EXPECT_STREQ(expected_text.c_str(), ocr_text.c_str()); - if (tess_local != tess) - delete tess_local; + if (tess_local != tess) delete tess_local; } - // Check that Tesseract/Cube produce the correct results in single-threaded // operation. If not, it is pointless to run the real multi-threaded tests. TEST_F(BaseapiThreadTest, TestBasicSanity) { diff --git a/unittest/bitvector_test.cc b/unittest/bitvector_test.cc index d37be39b0c..77a95e7785 100644 --- a/unittest/bitvector_test.cc +++ b/unittest/bitvector_test.cc @@ -34,13 +34,11 @@ class BitVectorTest : public testing::Test { TestAll(*map, false); map->SetBit(2); // Set all the odds to true. - for (int i = 3; i <= kPrimeLimit; i += 2) - map->SetValue(i, true); + for (int i = 3; i <= kPrimeLimit; i += 2) map->SetValue(i, true); int factor_limit = static_cast(sqrt(1.0 + kPrimeLimit)); for (int f = 3; f <= factor_limit; f += 2) { if (map->At(f)) { - for (int m = 2; m * f <= kPrimeLimit; ++m) - map->ResetBit(f * m); + for (int m = 2; m * f <= kPrimeLimit; ++m) map->ResetBit(f * m); } } } diff --git a/unittest/cleanapi_test.cc b/unittest/cleanapi_test.cc index 4904a6de69..3efd386ca2 100644 --- a/unittest/cleanapi_test.cc +++ b/unittest/cleanapi_test.cc @@ -16,25 +16,14 @@ // If this test fails to compile, clean up the includes in baseapi.h! // They are not supposed to drag in definitions of any of the tesseract // types included in this enum! -enum NameTester { - ABORT, - OKAY, - LOG, - BLOB, - ELIST, - TBOX, - TPOINT, - WORD -}; +enum NameTester { ABORT, OKAY, LOG, BLOB, ELIST, TBOX, TPOINT, WORD }; -#define ERRCODE_H // avoid redefinition of ABORT in errcode.h +#define ERRCODE_H // avoid redefinition of ABORT in errcode.h #include "include_gunit.h" namespace { // Verifies that the global namespace is clean. -TEST(CleanNamespaceTess, DummyTest) { - tesseract::TessBaseAPI api; -} +TEST(CleanNamespaceTess, DummyTest) { tesseract::TessBaseAPI api; } } // namespace. diff --git a/unittest/colpartition_test.cc b/unittest/colpartition_test.cc index 16bddcbaa5..140f06c294 100644 --- a/unittest/colpartition_test.cc +++ b/unittest/colpartition_test.cc @@ -27,11 +27,9 @@ class TestableColPartition : public ColPartition { class ColPartitionTest : public testing::Test { protected: - void SetUp() { - } + void SetUp() {} - void TearDown() { - } + void TearDown() {} }; TEST_F(ColPartitionTest, IsInSameColumnAsReflexive) { @@ -75,4 +73,4 @@ TEST_F(ColPartitionTest, IsInSameColumnAsPartialOverlap) { EXPECT_TRUE(b.IsInSameColumnAs(a)); } -} // namespace +} // namespace diff --git a/unittest/commandlineflags_test.cc b/unittest/commandlineflags_test.cc index cc97eed007..da391e30c1 100644 --- a/unittest/commandlineflags_test.cc +++ b/unittest/commandlineflags_test.cc @@ -41,10 +41,9 @@ class CommandlineflagsTest : public ::testing::Test { } }; - TEST_F(CommandlineflagsTest, RemoveFlags) { - const char* const_argv[] = { "Progname", "--foo_int", "3", - "file1.h", "file2.h" }; + const char* const_argv[] = {"Progname", "--foo_int", "3", "file1.h", + "file2.h"}; int argc = ARRAYSIZE(const_argv); char** argv = const_cast(const_argv); tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true); @@ -56,7 +55,7 @@ TEST_F(CommandlineflagsTest, RemoveFlags) { EXPECT_STREQ("file2.h", argv[2]); } -#if 0 // TODO: this test needs an update (it currently fails). +#if 0 // TODO: this test needs an update (it currently fails). TEST_F(CommandlineflagsTest, PrintUsageAndExit) { const char* argv[] = { "Progname", "--help" }; EXPECT_EXIT(TestParser("Progname [flags]", ARRAYSIZE(argv), argv), @@ -66,66 +65,65 @@ TEST_F(CommandlineflagsTest, PrintUsageAndExit) { #endif TEST_F(CommandlineflagsTest, ExitsWithErrorOnInvalidFlag) { - const char* argv[] = { "", "--test_nonexistent_flag" }; - EXPECT_EXIT(TestParser(ARRAYSIZE(argv), argv), - ::testing::ExitedWithCode(1), + const char* argv[] = {"", "--test_nonexistent_flag"}; + EXPECT_EXIT(TestParser(ARRAYSIZE(argv), argv), ::testing::ExitedWithCode(1), "ERROR: Non-existent flag"); } TEST_F(CommandlineflagsTest, ParseIntegerFlags) { - const char* argv[] = { "", "--foo_int=3", "--bar_int", "-4" }; + const char* argv[] = {"", "--foo_int=3", "--bar_int", "-4"}; TestParser(ARRAYSIZE(argv), argv); EXPECT_EQ(3, FLAGS_foo_int); EXPECT_EQ(-4, FLAGS_bar_int); - const char* arg_no_value[] = { "", "--bar_int" }; + const char* arg_no_value[] = {"", "--bar_int"}; EXPECT_EXIT(TestParser(ARRAYSIZE(arg_no_value), arg_no_value), ::testing::ExitedWithCode(1), "ERROR"); - const char* arg_invalid_value[] = { "", "--bar_int", "--foo_int=3" }; + const char* arg_invalid_value[] = {"", "--bar_int", "--foo_int=3"}; EXPECT_EXIT(TestParser(ARRAYSIZE(arg_invalid_value), arg_invalid_value), ::testing::ExitedWithCode(1), "ERROR"); - const char* arg_bad_format[] = { "", "--bar_int=" }; + const char* arg_bad_format[] = {"", "--bar_int="}; EXPECT_EXIT(TestParser(ARRAYSIZE(arg_bad_format), arg_bad_format), ::testing::ExitedWithCode(1), "ERROR"); } TEST_F(CommandlineflagsTest, ParseDoubleFlags) { - const char* argv[] = { "", "--foo_double=3.14", "--bar_double", "1.2" }; + const char* argv[] = {"", "--foo_double=3.14", "--bar_double", "1.2"}; TestParser(ARRAYSIZE(argv), argv); EXPECT_EQ(3.14, FLAGS_foo_double); EXPECT_EQ(1.2, FLAGS_bar_double); - const char* arg_no_value[] = { "", "--bar_double" }; - EXPECT_EXIT(TestParser(2, arg_no_value), - ::testing::ExitedWithCode(1), "ERROR"); + const char* arg_no_value[] = {"", "--bar_double"}; + EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1), + "ERROR"); - const char* arg_bad_format[] = { "", "--bar_double=" }; - EXPECT_EXIT(TestParser(2, arg_bad_format), - ::testing::ExitedWithCode(1), "ERROR"); + const char* arg_bad_format[] = {"", "--bar_double="}; + EXPECT_EXIT(TestParser(2, arg_bad_format), ::testing::ExitedWithCode(1), + "ERROR"); } TEST_F(CommandlineflagsTest, ParseStringFlags) { - const char* argv[] = { "", "--foo_string=abc", "--bar_string", "def" }; + const char* argv[] = {"", "--foo_string=abc", "--bar_string", "def"}; TestParser(ARRAYSIZE(argv), argv); EXPECT_STREQ("abc", FLAGS_foo_string.c_str()); EXPECT_STREQ("def", FLAGS_bar_string.c_str()); - const char* arg_no_value[] = { "", "--bar_string" }; - EXPECT_EXIT(TestParser(2, arg_no_value), - ::testing::ExitedWithCode(1), "ERROR"); + const char* arg_no_value[] = {"", "--bar_string"}; + EXPECT_EXIT(TestParser(2, arg_no_value), ::testing::ExitedWithCode(1), + "ERROR"); FLAGS_bar_string.set_value("bar"); - const char* arg_empty_string[] = { "", "--bar_string=" }; + const char* arg_empty_string[] = {"", "--bar_string="}; TestParser(2, arg_empty_string); EXPECT_STREQ("", FLAGS_bar_string.c_str()); } TEST_F(CommandlineflagsTest, ParseBoolFlags) { - const char* argv[] = { "", "--foo_bool=true", "--bar_bool=1" }; + const char* argv[] = {"", "--foo_bool=true", "--bar_bool=1"}; FLAGS_foo_bool.set_value(false); FLAGS_bar_bool.set_value(false); TestParser(ARRAYSIZE(argv), argv); @@ -133,7 +131,7 @@ TEST_F(CommandlineflagsTest, ParseBoolFlags) { EXPECT_TRUE(FLAGS_foo_bool); EXPECT_TRUE(FLAGS_bar_bool); - const char* inv_argv[] = { "", "--foo_bool=false", "--bar_bool=0" }; + const char* inv_argv[] = {"", "--foo_bool=false", "--bar_bool=0"}; FLAGS_foo_bool.set_value(true); FLAGS_bar_bool.set_value(true); TestParser(3, inv_argv); @@ -141,19 +139,19 @@ TEST_F(CommandlineflagsTest, ParseBoolFlags) { EXPECT_FALSE(FLAGS_foo_bool); EXPECT_FALSE(FLAGS_bar_bool); - const char* arg_implied_true[] = { "", "--bar_bool" }; + const char* arg_implied_true[] = {"", "--bar_bool"}; FLAGS_bar_bool.set_value(false); TestParser(2, arg_implied_true); EXPECT_TRUE(FLAGS_bar_bool); - const char* arg_missing_val[] = { "", "--bar_bool=" }; - EXPECT_EXIT(TestParser(2, arg_missing_val), - ::testing::ExitedWithCode(1), "ERROR"); + const char* arg_missing_val[] = {"", "--bar_bool="}; + EXPECT_EXIT(TestParser(2, arg_missing_val), ::testing::ExitedWithCode(1), + "ERROR"); } TEST_F(CommandlineflagsTest, ParseOldFlags) { EXPECT_STREQ("", FLAGS_q.c_str()); - const char* argv[] = { "", "-q", "text" }; + const char* argv[] = {"", "-q", "text"}; TestParser(ARRAYSIZE(argv), argv); EXPECT_STREQ("text", FLAGS_q.c_str()); } diff --git a/unittest/dawg_test.cc b/unittest/dawg_test.cc index 039ac93461..dacc01d34c 100644 --- a/unittest/dawg_test.cc +++ b/unittest/dawg_test.cc @@ -5,20 +5,20 @@ #include "util/process/subprocess.h" -#include "tesseract/dict/trie.h" -#include "tesseract/ccutil/unicharset.h" #include "tesseract/ccstruct/ratngs.h" +#include "tesseract/ccutil/unicharset.h" +#include "tesseract/dict/trie.h" namespace { -void RemoveTrailingLineTerminators(char *line) { - char *end = line + strlen(line) - 1; +void RemoveTrailingLineTerminators(char* line) { + char* end = line + strlen(line) - 1; while (end >= line && ('\n' == *end || '\r' == *end)) { *end-- = 0; } } -void AddLineToSet(std::set *words, char *line) { +void AddLineToSet(std::set* words, char* line) { RemoveTrailingLineTerminators(line); words->insert(line); } @@ -27,7 +27,7 @@ void AddLineToSet(std::set *words, char *line) { // aka Directed Acyclic Word Graphs). class DawgTest : public testing::Test { protected: - void LoadWordlist(const string &filename, std::set *words) const { + void LoadWordlist(const string& filename, std::set* words) const { FileLineReader::Options options; options.set_comment_char(0); FileLineReader flr(filename.c_str(), options); @@ -35,8 +35,7 @@ class DawgTest : public testing::Test { flr.Reload(); } string TestDataNameToPath(const string& name) const { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessBinaryPath(const string& binary_name) const { return file::JoinPath(FLAGS_test_srcdir, @@ -44,10 +43,8 @@ class DawgTest : public testing::Test { string OutputNameToPath(const string& name) const { return file::JoinPath(FLAGS_test_tmpdir, name); } - int RunCommand(const string &program, - const string &arg1, - const string &arg2, - const string &arg3) const { + int RunCommand(const string& program, const string& arg1, const string& arg2, + const string& arg3) const { SubProcess p; std::vector argv; argv.push_back(program); @@ -62,8 +59,8 @@ class DawgTest : public testing::Test { // Test that we are able to convert a wordlist file (one "word" per line) to // a dawg (a compressed format) and then extract the original wordlist back // out using the tools "wordlist2dawg" and "dawg2wordlist." - void TestDawgRoundTrip(const string &unicharset_filename, - const string &wordlist_filename) const { + void TestDawgRoundTrip(const string& unicharset_filename, + const string& wordlist_filename) const { std::set orig_words, roundtrip_words; string unicharset = TestDataNameToPath(unicharset_filename); string orig_wordlist = TestDataNameToPath(wordlist_filename); @@ -71,8 +68,7 @@ class DawgTest : public testing::Test { string output_wordlist = OutputNameToPath(wordlist_filename); LoadWordlist(orig_wordlist, &orig_words); EXPECT_EQ( - RunCommand("wordlist2dawg", orig_wordlist, output_dawg, unicharset), - 0); + RunCommand("wordlist2dawg", orig_wordlist, output_dawg, unicharset), 0); EXPECT_EQ( RunCommand("dawg2wordlist", unicharset, output_dawg, output_wordlist), 0); diff --git a/unittest/denorm_test.cc b/unittest/denorm_test.cc index b6da992ae8..a941e1b40d 100644 --- a/unittest/denorm_test.cc +++ b/unittest/denorm_test.cc @@ -18,11 +18,9 @@ namespace { class DENORMTest : public testing::Test { public: - void SetUp() { - } + void SetUp() {} - void TearDown() { - } + void TearDown() {} void ExpectCorrectTransform(const DENORM& denorm, const TPOINT& src, const TPOINT& result, bool local) { @@ -47,8 +45,7 @@ class DENORMTest : public testing::Test { // Tests a simple baseline-style normalization. TEST_F(DENORMTest, NoRotations) { DENORM denorm; - denorm.SetupNormalization(NULL, NULL, NULL, - 1000.0f, 2000.0f, 2.0f, 3.0f, + denorm.SetupNormalization(NULL, NULL, NULL, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f, static_cast(kBlnBaselineOffset)); TPOINT pt1(1100, 2000); TPOINT result1(200, kBlnBaselineOffset); @@ -64,9 +61,8 @@ TEST_F(DENORMTest, NoRotations) { TEST_F(DENORMTest, WithRotations) { DENORM denorm; FCOORD rotation90(0.0f, 1.0f); - denorm.SetupNormalization(NULL, &rotation90, NULL, - 1000.0f, 2000.0f, 2.0f, 3.0f, - 0.0f, static_cast(kBlnBaselineOffset)); + denorm.SetupNormalization(NULL, &rotation90, NULL, 1000.0f, 2000.0f, 2.0f, + 3.0f, 0.0f, static_cast(kBlnBaselineOffset)); TPOINT pt1(1100, 2000); TPOINT result1(0, 200 + kBlnBaselineOffset); @@ -81,14 +77,13 @@ TEST_F(DENORMTest, WithRotations) { // Tests a simple baseline-style normalization with a second rotation & scale. TEST_F(DENORMTest, Multiple) { DENORM denorm; - denorm.SetupNormalization(NULL, NULL, NULL, - 1000.0f, 2000.0f, 2.0f, 3.0f, + denorm.SetupNormalization(NULL, NULL, NULL, 1000.0f, 2000.0f, 2.0f, 3.0f, 0.0f, static_cast(kBlnBaselineOffset)); DENORM denorm2; FCOORD rotation90(0.0f, 1.0f); - denorm2.SetupNormalization(NULL, &rotation90, &denorm, - 128.0f, 128.0f, 0.5f, 0.25f, 0.0f, 0.0f); + denorm2.SetupNormalization(NULL, &rotation90, &denorm, 128.0f, 128.0f, 0.5f, + 0.25f, 0.0f, 0.0f); TPOINT pt1(1050, 2000); TPOINT result1(100, kBlnBaselineOffset); ExpectCorrectTransform(denorm, pt1, result1, true); diff --git a/unittest/equationdetect_test.cc b/unittest/equationdetect_test.cc index 8d564daaa2..e03ff95829 100644 --- a/unittest/equationdetect_test.cc +++ b/unittest/equationdetect_test.cc @@ -13,9 +13,8 @@ namespace tesseract { class TestableEquationDetect : public EquationDetect { public: - TestableEquationDetect(const char* tessdata, - Tesseract* lang_tesseract) : - EquationDetect(tessdata, "equ") { + TestableEquationDetect(const char* tessdata, Tesseract* lang_tesseract) + : EquationDetect(tessdata, "equ") { SetLangTesseract(lang_tesseract); } @@ -26,46 +25,44 @@ class TestableEquationDetect : public EquationDetect { CHECK_LE(math_blobs + digit_blobs, total_blobs); int count = 0; for (int i = 0; i < math_blobs; i++, count++) { - BLOBNBOX *blob = new BLOBNBOX(); + BLOBNBOX* blob = new BLOBNBOX(); blob->set_special_text_type(BSTT_MATH); part->AddBox(blob); } for (int i = 0; i < digit_blobs; i++, count++) { - BLOBNBOX *blob = new BLOBNBOX(); + BLOBNBOX* blob = new BLOBNBOX(); blob->set_special_text_type(BSTT_DIGIT); part->AddBox(blob); } for (int i = count; i < total_blobs; i++) { - BLOBNBOX *blob = new BLOBNBOX(); + BLOBNBOX* blob = new BLOBNBOX(); blob->set_special_text_type(BSTT_NONE); part->AddBox(blob); } } // Set up pix_binary for lang_tesseract_. - void SetPixBinary(Pix *pix) { + void SetPixBinary(Pix* pix) { CHECK_EQ(1, pixGetDepth(pix)); *(lang_tesseract_->mutable_pix_binary()) = pix; } - void RunIdentifySpecialText(BLOBNBOX*blob, const int height_th) { + void RunIdentifySpecialText(BLOBNBOX* blob, const int height_th) { IdentifySpecialText(blob, height_th); } - BlobSpecialTextType RunEstimateTypeForUnichar( - const char*val) { + BlobSpecialTextType RunEstimateTypeForUnichar(const char* val) { const UNICHARSET& unicharset = lang_tesseract_->unicharset; return EstimateTypeForUnichar(unicharset, unicharset.unichar_to_id(val)); } - EquationDetect::IndentType RunIsIndented( - ColPartitionGrid* part_grid, ColPartition* part) { + EquationDetect::IndentType RunIsIndented(ColPartitionGrid* part_grid, + ColPartition* part) { this->part_grid_ = part_grid; return IsIndented(part); } - bool RunIsNearSmallNeighbor(const TBOX& seed_box, - const TBOX& part_box) { + bool RunIsNearSmallNeighbor(const TBOX& seed_box, const TBOX& part_box) { return IsNearSmallNeighbor(seed_box, part_box); } @@ -108,17 +105,15 @@ class EquationFinderTest : public testing::Test { string testdata_dir_; void SetUp() { - string tessdata_dir = file::JoinPath( - FLAGS_test_srcdir, "tessdata"); + string tessdata_dir = file::JoinPath(FLAGS_test_srcdir, "tessdata"); tesseract_.reset(new Tesseract()); tesseract_->init_tesseract(tessdata_dir.c_str(), "eng", OEM_TESSERACT_ONLY); tesseract_->set_source_resolution(300); - equation_det_.reset(new TestableEquationDetect( - tessdata_dir.c_str(), tesseract_.get())); + equation_det_.reset( + new TestableEquationDetect(tessdata_dir.c_str(), tesseract_.get())); equation_det_->SetResolution(300); - testdata_dir_ = file::JoinPath( - FLAGS_test_srcdir, "testdata"); + testdata_dir_ = file::JoinPath(FLAGS_test_srcdir, "testdata"); } void TearDown() { @@ -127,19 +122,17 @@ class EquationFinderTest : public testing::Test { } // Add a BLOCK covering the whole page. - void AddPageBlock(Pix* pix, - BLOCK_LIST* blocks) { + void AddPageBlock(Pix* pix, BLOCK_LIST* blocks) { CHECK(pix != nullptr); CHECK(blocks != nullptr); BLOCK_IT block_it(blocks); - BLOCK* block = new BLOCK("", TRUE, 0, 0, 0, 0, - pixGetWidth(pix), pixGetHeight(pix)); + BLOCK* block = + new BLOCK("", TRUE, 0, 0, 0, 0, pixGetWidth(pix), pixGetHeight(pix)); block_it.add_to_end(block); } // Create col partitions, add into part_grid, and put them into all_parts. - void CreateColParts(const int rows, - const int cols, + void CreateColParts(const int rows, const int cols, ColPartitionGrid* part_grid, std::vector* all_parts) { const int kWidth = 10, kHeight = 10; @@ -148,8 +141,8 @@ class EquationFinderTest : public testing::Test { for (int x = 0; x < cols; ++x) { int left = x * kWidth * 2, bottom = y * kHeight * 2; TBOX box(left, bottom, left + kWidth, bottom + kHeight); - ColPartition* part = ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, + BRT_TEXT, BTFT_NONE); part_grid->InsertBBox(true, true, part); all_parts->push_back(part); } @@ -159,15 +152,14 @@ class EquationFinderTest : public testing::Test { void ClearParts(std::vector* all_parts) { for (int i = 0; i < all_parts->size(); ++i) { (*all_parts)[i]->DeleteBoxes(); - delete((*all_parts)[i]); + delete ((*all_parts)[i]); } } // Create a BLOBNBOX object with bounding box tbox, and add it into part. - void AddBlobIntoPart(const TBOX& tbox, - ColPartition* part) { + void AddBlobIntoPart(const TBOX& tbox, ColPartition* part) { CHECK(part != nullptr); - BLOBNBOX *blob = new BLOBNBOX(); + BLOBNBOX* blob = new BLOBNBOX(); blob->set_bounding_box(tbox); part->AddBox(blob); } @@ -176,7 +168,7 @@ class EquationFinderTest : public testing::Test { TEST_F(EquationFinderTest, IdentifySpecialText) { // Load Image. string imagefile = file::JoinPath(testdata_dir_, "equ_gt1.tif"); - Pix *pix_binary = pixRead(imagefile.c_str()); + Pix* pix_binary = pixRead(imagefile.c_str()); CHECK(pix_binary != NULL && pixGetDepth(pix_binary) == 1); // Get components. @@ -251,24 +243,24 @@ TEST_F(EquationFinderTest, IsIndented) { // // part 5: ******** TBOX box1(0, 950, 999, 999); - ColPartition* part1 = ColPartition::FakePartition( - box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part1 = + ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part_grid.InsertBBox(true, true, part1); TBOX box2(300, 920, 900, 940); - ColPartition* part2 = ColPartition::FakePartition( - box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part2 = + ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part_grid.InsertBBox(true, true, part2); TBOX box3(0, 900, 600, 910); - ColPartition* part3 = ColPartition::FakePartition( - box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part3 = + ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part_grid.InsertBBox(true, true, part3); TBOX box4(300, 890, 600, 899); - ColPartition* part4 = ColPartition::FakePartition( - box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part4 = + ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part_grid.InsertBBox(true, true, part4); TBOX box5(300, 500, 900, 510); - ColPartition* part5 = ColPartition::FakePartition( - box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part5 = + ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part_grid.InsertBBox(true, true, part5); // Test @@ -290,15 +282,15 @@ TEST_F(EquationFinderTest, IsIndented) { // Release memory. part1->DeleteBoxes(); - delete(part1); + delete (part1); part2->DeleteBoxes(); - delete(part2); + delete (part2); part3->DeleteBoxes(); - delete(part3); + delete (part3); part4->DeleteBoxes(); - delete(part4); + delete (part4); part5->DeleteBoxes(); - delete(part5); + delete (part5); } TEST_F(EquationFinderTest, IsNearSmallNeighbor) { @@ -332,14 +324,14 @@ TEST_F(EquationFinderTest, IsNearSmallNeighbor) { TEST_F(EquationFinderTest, CheckSeedBlobsCount) { TBOX box(0, 950, 999, 999); - ColPartition* part1 = ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); - ColPartition* part2= ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); - ColPartition* part3 = ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); - ColPartition* part4 = ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part1 = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part2 = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part3 = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part4 = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); // Part 1: 8 math, 0 digit, 20 total. equation_det_->AddMathDigitBlobs(8, 0, 20, part1); @@ -359,19 +351,19 @@ TEST_F(EquationFinderTest, CheckSeedBlobsCount) { // Release memory. part1->DeleteBoxes(); - delete(part1); + delete (part1); part2->DeleteBoxes(); - delete(part2); + delete (part2); part3->DeleteBoxes(); - delete(part3); + delete (part3); part4->DeleteBoxes(); - delete(part4); + delete (part4); } TEST_F(EquationFinderTest, ComputeForegroundDensity) { // Create the pix with top half foreground, bottom half background. int width = 1024, height = 768; - Pix *pix = pixCreate(width, height, 1); + Pix* pix = pixCreate(width, height, 1); pixRasterop(pix, 0, 0, width, height / 2, PIX_SET, NULL, 0, 0); TBOX box1(100, 0, 140, 140), box2(100, height / 2 - 20, 140, height / 2 + 20), box3(100, height - 40, 140, height); @@ -414,20 +406,20 @@ TEST_F(EquationFinderTest, ComputeCPsSuperBBox) { ColPartitionGrid part_grid(10, ICOORD(0, 0), ICOORD(1000, 1000)); TBOX box1(0, 0, 999, 99); - ColPartition* part1 = ColPartition::FakePartition( - box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part1 = + ColPartition::FakePartition(box1, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); TBOX box2(0, 100, 499, 199); - ColPartition* part2 = ColPartition::FakePartition( - box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part2 = + ColPartition::FakePartition(box2, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); TBOX box3(500, 100, 999, 199); - ColPartition* part3 = ColPartition::FakePartition( - box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part3 = + ColPartition::FakePartition(box3, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); TBOX box4(0, 200, 999, 299); - ColPartition* part4 = ColPartition::FakePartition( - box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part4 = + ColPartition::FakePartition(box4, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); TBOX box5(0, 900, 999, 999); - ColPartition* part5 = ColPartition::FakePartition( - box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part5 = + ColPartition::FakePartition(box5, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); // Add part1->part3 into part_grid and test. part_grid.InsertBBox(true, true, part1); @@ -448,21 +440,21 @@ TEST_F(EquationFinderTest, ComputeCPsSuperBBox) { // Release memory. part1->DeleteBoxes(); - delete(part1); + delete (part1); part2->DeleteBoxes(); - delete(part2); + delete (part2); part3->DeleteBoxes(); - delete(part3); + delete (part3); part4->DeleteBoxes(); - delete(part4); + delete (part4); part5->DeleteBoxes(); - delete(part5); + delete (part5); } TEST_F(EquationFinderTest, SplitCPHorLite) { TBOX box(0, 0, 999, 99); - ColPartition* part = ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part->DeleteBoxes(); part->set_median_width(10); GenericVector splitted_boxes; @@ -491,13 +483,13 @@ TEST_F(EquationFinderTest, SplitCPHorLite) { EXPECT_TRUE(TBOX(500, 0, 540, 35) == splitted_boxes[2]); part->DeleteBoxes(); - delete(part); + delete (part); } TEST_F(EquationFinderTest, SplitCPHor) { TBOX box(0, 0, 999, 99); - ColPartition* part = ColPartition::FakePartition( - box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); + ColPartition* part = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part->DeleteBoxes(); part->set_median_width(10); GenericVector parts_splitted; @@ -528,7 +520,7 @@ TEST_F(EquationFinderTest, SplitCPHor) { parts_splitted.delete_data_pointers(); part->DeleteBoxes(); - delete(part); + delete (part); } } // namespace tesseract diff --git a/unittest/fileio_test.cc b/unittest/fileio_test.cc index 9a7f7ea04a..19c51bdc14 100644 --- a/unittest/fileio_test.cc +++ b/unittest/fileio_test.cc @@ -18,8 +18,7 @@ TEST(FileTest, JoinPath) { TEST(OutputBufferTest, WriteString) { const int kMaxBufSize = 128; char buffer[kMaxBufSize]; - for (int i = 0; i < kMaxBufSize; ++i) - buffer[i] = '\0'; + for (int i = 0; i < kMaxBufSize; ++i) buffer[i] = '\0'; FILE* fp = fmemopen(buffer, kMaxBufSize, "w"); CHECK(fp != nullptr); diff --git a/unittest/heap_test.cc b/unittest/heap_test.cc index 6e49e22544..130bf1c848 100644 --- a/unittest/heap_test.cc +++ b/unittest/heap_test.cc @@ -21,12 +21,12 @@ namespace tesseract { -int test_data[] = { 8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}; +int test_data[] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0}; // The fixture for testing GenericHeap and DoublePtr. class HeapTest : public testing::Test { public: - virtual ~HeapTest(); + virtual ~HeapTest(); // Pushes the test data onto both the heap and the KDVector. void PushTestData(GenericHeap* heap, KDVector* v) { for (int i = 0; i < ARRAYSIZE(test_data); ++i) { diff --git a/unittest/imagedata_test.cc b/unittest/imagedata_test.cc index 83ac62833b..7abda8e0db 100644 --- a/unittest/imagedata_test.cc +++ b/unittest/imagedata_test.cc @@ -12,8 +12,7 @@ namespace { class ImagedataTest : public ::testing::Test { protected: - ImagedataTest() { - } + ImagedataTest() {} // Creates a fake DocumentData, writes it to a file, and returns the filename. string MakeFakeDoc(int num_pages, int doc_id, @@ -51,7 +50,7 @@ TEST_F(ImagedataTest, CachesProperly) { // Allowances to read the document. Big enough for 1, 3, 0, all pages. const int kMemoryAllowances[] = {2000000, 4000000, 1000000, 100000000, 0}; // Order in which to read the pages, with some sequential and some seeks. - const int kPageReadOrder[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, 11, 10, 9, -1 }; + const int kPageReadOrder[] = {0, 1, 2, 3, 8, 4, 5, 6, 7, 11, 10, 9, -1}; std::vector page_texts; string filename = MakeFakeDoc(kNumPages, 0, &page_texts); diff --git a/unittest/include_gunit.h b/unittest/include_gunit.h index f4e0773167..4bccf37e22 100644 --- a/unittest/include_gunit.h +++ b/unittest/include_gunit.h @@ -13,14 +13,13 @@ #ifndef TESSERACT_UNITTEST_INCLUDE_GUNIT_H_ #define TESSERACT_UNITTEST_INCLUDE_GUNIT_H_ +#include "errcode.h" // for ASSERT_HOST +#include "fileio.h" // for tesseract::File #include "gtest/gtest.h" -#include "errcode.h" // for ASSERT_HOST -#include "fileio.h" // for tesseract::File const char* FLAGS_test_tmpdir = "."; -class file: public tesseract::File { -}; +class file : public tesseract::File {}; #define ABSL_ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0])) #define ARRAYSIZE(arr) (sizeof(arr) / sizeof(arr[0])) diff --git a/unittest/indexmapbidi_test.cc b/unittest/indexmapbidi_test.cc index 8e170faadf..74bde1a6eb 100644 --- a/unittest/indexmapbidi_test.cc +++ b/unittest/indexmapbidi_test.cc @@ -34,13 +34,11 @@ class IndexMapBiDiTest : public testing::Test { map->Init(kPrimeLimit + 1, false); map->SetMap(2, true); // Set all the odds to true. - for (int i = 3; i <= kPrimeLimit; i += 2) - map->SetMap(i, true); + for (int i = 3; i <= kPrimeLimit; i += 2) map->SetMap(i, true); int factor_limit = static_cast(sqrt(1.0 + kPrimeLimit)); for (int f = 3; f <= factor_limit; f += 2) { if (map->SparseToCompact(f) >= 0) { - for (int m = 2; m * f <= kPrimeLimit; ++m) - map->SetMap(f * m, false); + for (int m = 2; m * f <= kPrimeLimit; ++m) map->SetMap(f * m, false); } } map->Setup(); diff --git a/unittest/intfeaturemap_test.cc b/unittest/intfeaturemap_test.cc index a941e12ecb..fd59cbe2bc 100644 --- a/unittest/intfeaturemap_test.cc +++ b/unittest/intfeaturemap_test.cc @@ -74,8 +74,8 @@ TEST_F(IntFeatureMapTest, Exhaustive) { int dtheta = kIntFeatureExtent / kThetaBuckets + 1; int bad_offsets = 0; for (int index = 0; index < total_buckets; ++index) { - for (int dir = -tesseract::kNumOffsetMaps; - dir <= tesseract::kNumOffsetMaps; ++dir) { + for (int dir = -tesseract::kNumOffsetMaps; dir <= tesseract::kNumOffsetMaps; + ++dir) { int offset_index = map.OffsetFeature(index, dir); if (dir == 0) { EXPECT_EQ(index, offset_index); diff --git a/unittest/lang_model_test.cc b/unittest/lang_model_test.cc index 3374b1ff60..7a577c3aaa 100644 --- a/unittest/lang_model_test.cc +++ b/unittest/lang_model_test.cc @@ -7,8 +7,7 @@ namespace tesseract { namespace { string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata", name); + return file::JoinPath(FLAGS_test_srcdir, "testdata", name); } // This is an integration test that verifies that CombineLangModel works to diff --git a/unittest/layout_test.cc b/unittest/layout_test.cc index faf68d603b..f63883cc26 100644 --- a/unittest/layout_test.cc +++ b/unittest/layout_test.cc @@ -13,35 +13,27 @@ namespace { using tesseract::MutableIterator; -using tesseract::ResultIterator; using tesseract::PageIteratorLevel; +using tesseract::ResultIterator; const char* kStrings8087_054[] = { - "dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", NULL -}; -const PolyBlockType kBlocks8087_054[] = { - PT_HEADING_TEXT, PT_FLOWING_TEXT, PT_PULLOUT_IMAGE, - PT_CAPTION_TEXT, PT_FLOWING_TEXT -}; + "dat", "Dalmatian", "", "DAMAGED DURING", "margarine,", NULL}; +const PolyBlockType kBlocks8087_054[] = {PT_HEADING_TEXT, PT_FLOWING_TEXT, + PT_PULLOUT_IMAGE, PT_CAPTION_TEXT, + PT_FLOWING_TEXT}; // The fixture for testing Tesseract. class LayoutTest : public testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessdataPath() { - return file::JoinPath(FLAGS_test_srcdir, - "tessdata"); + return file::JoinPath(FLAGS_test_srcdir, "tessdata"); } - LayoutTest() { - src_pix_ = NULL; - } - ~LayoutTest() { - pixDestroy(&src_pix_); - } + LayoutTest() { src_pix_ = NULL; } + ~LayoutTest() { pixDestroy(&src_pix_); } void SetImage(const char* filename, const char* lang) { pixDestroy(&src_pix_); @@ -56,16 +48,14 @@ class LayoutTest : public testing::Test { // allowing for other blocks in between. // An empty string should match an image block, and a NULL string // indicates the end of the array. - void VerifyBlockTextOrder(const char* strings[], - const PolyBlockType* blocks, + void VerifyBlockTextOrder(const char* strings[], const PolyBlockType* blocks, ResultIterator* it) { it->Begin(); int string_index = 0; int block_index = 0; do { char* block_text = it->GetUTF8Text(tesseract::RIL_BLOCK); - if (block_text != NULL && - it->BlockType() == blocks[string_index] && + if (block_text != NULL && it->BlockType() == blocks[string_index] && strstr(block_text, strings[string_index]) != NULL) { VLOG(1) << StringPrintf("Found string %s in block %d of type %s", strings[string_index], block_index, @@ -74,19 +64,18 @@ class LayoutTest : public testing::Test { ++string_index; } else if (it->BlockType() == blocks[string_index] && block_text == NULL && strings[string_index][0] == '\0') { - VLOG(1) << StringPrintf("Found block of type %s at block %d", - kPolyBlockNames[blocks[string_index]], - block_index); - // Found this one. - ++string_index; + VLOG(1) << StringPrintf("Found block of type %s at block %d", + kPolyBlockNames[blocks[string_index]], + block_index); + // Found this one. + ++string_index; } else { VLOG(1) << StringPrintf("No match found in block with text:\n%s", block_text); } - delete [] block_text; + delete[] block_text; ++block_index; - if (strings[string_index] == NULL) - break; + if (strings[string_index] == NULL) break; } while (it->Next(tesseract::RIL_BLOCK)); EXPECT_TRUE(strings[string_index] == NULL); } @@ -105,8 +94,8 @@ class LayoutTest : public testing::Test { do { int left, top, right, bottom; if (it->BoundingBox(tesseract::RIL_BLOCK, &left, &top, &right, &bottom) && - PTIsTextType(it->BlockType()) && - right - left > 800 && bottom - top > 200) { + PTIsTextType(it->BlockType()) && right - left > 800 && + bottom - top > 200) { if (prev_right > prev_left) { if (min(right, prev_right) > max(left, prev_left)) { EXPECT_GE(top, prev_bottom) << "Overlapping block should be below"; @@ -134,8 +123,8 @@ class LayoutTest : public testing::Test { do { int left, top, right, bottom; if (it->BoundingBox(tesseract::RIL_BLOCK, &left, &top, &right, &bottom) && - PTIsTextType(it->BlockType()) && - right - left > 800 && bottom - top > 200 ) { + PTIsTextType(it->BlockType()) && right - left > 800 && + bottom - top > 200) { const PAGE_RES_IT* pr_it = it->PageResIt(); POLY_BLOCK* pb = pr_it->block()->block->poly_block(); CHECK(pb != NULL); @@ -148,11 +137,11 @@ class LayoutTest : public testing::Test { const PAGE_RES_IT* w_it = word_it.PageResIt(); // Iterate the blobs in the word. C_BLOB_IT b_it(w_it->word()->word->cblob_list()); - for (b_it.mark_cycle_pt();!b_it.cycled_list(); b_it.forward()) { + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { C_BLOB* blob = b_it.data(); // Iterate the outlines in the blob. C_OUTLINE_IT ol_it(blob->out_list()); - for (ol_it.mark_cycle_pt();!ol_it.cycled_list(); ol_it.forward()) { + for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { C_OUTLINE* ol = ol_it.data(); TBOX box = ol->bounding_box(); ICOORD middle((box.left() + box.right()) / 2, diff --git a/unittest/ligature_table_test.cc b/unittest/ligature_table_test.cc index 736c785ac4..6f068df09c 100644 --- a/unittest/ligature_table_test.cc +++ b/unittest/ligature_table_test.cc @@ -25,13 +25,10 @@ const char kRenderableEngLigatureText[] = "fidelity effigy ſteep"; class LigatureTableTest : public ::testing::Test { protected: static void SetUpTestCase() { - FLAGS_fonts_dir = File::JoinPath( - FLAGS_test_srcdir, "testdata"); + FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata"); FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir; } - void SetUp() { - lig_table_ = LigatureTable::Get(); - } + void SetUp() { lig_table_ = LigatureTable::Get(); } LigatureTable* lig_table_; }; @@ -66,11 +63,8 @@ TEST_F(LigatureTableTest, DoesRemoveLigatures) { TEST_F(LigatureTableTest, TestCustomLigatures) { const char* kTestCases[] = { - "act", "a\uE003", - "publiſh", "publi\uE006", - "ſince", "\uE007nce", - "aſleep", "a\uE008eep", - "neceſſary", "nece\uE009ary", + "act", "a\uE003", "publiſh", "publi\uE006", "ſince", + "\uE007nce", "aſleep", "a\uE008eep", "neceſſary", "nece\uE009ary", }; for (int i = 0; i < ARRAYSIZE(kTestCases); i += 2) { EXPECT_STREQ(kTestCases[i + 1], @@ -84,7 +78,9 @@ TEST_F(LigatureTableTest, TestCustomLigatures) { TEST_F(LigatureTableTest, TestRemovesCustomLigatures) { const char* kTestCases[] = { - "fiction", "fi\uE003ion", "fiction", + "fiction", + "fi\uE003ion", + "fiction", }; for (int i = 0; i < ARRAYSIZE(kTestCases); i += 3) { EXPECT_STREQ(kTestCases[i + 1], @@ -93,4 +89,4 @@ TEST_F(LigatureTableTest, TestRemovesCustomLigatures) { lig_table_->RemoveCustomLigatures(kTestCases[i + 1]).c_str()); } } -} +} // namespace diff --git a/unittest/linlsq_test.cc b/unittest/linlsq_test.cc index 176637dbf9..7aa6e75875 100644 --- a/unittest/linlsq_test.cc +++ b/unittest/linlsq_test.cc @@ -17,11 +17,9 @@ namespace { class LLSQTest : public testing::Test { public: - void SetUp() { - } + void SetUp() {} - void TearDown() { - } + void TearDown() {} void ExpectCorrectLine(const LLSQ& llsq, double m, double c, double rms, double pearson, double tolerance) { @@ -30,14 +28,14 @@ class LLSQTest : public testing::Test { EXPECT_NEAR(rms, llsq.rms(llsq.m(), llsq.c(llsq.m())), tolerance); EXPECT_NEAR(pearson, llsq.pearson(), tolerance); } - FCOORD PtsMean(const std::vector &pts) { + FCOORD PtsMean(const std::vector& pts) { FCOORD total(0, 0); for (int i = 0; i < pts.size(); i++) { total += pts[i]; } return (pts.size() > 0) ? total / pts.size() : total; } - void VerifyRmsOrth(const std::vector &pts, const FCOORD &orth) { + void VerifyRmsOrth(const std::vector& pts, const FCOORD& orth) { LLSQ llsq; FCOORD xavg = PtsMean(pts); FCOORD nvec = !orth; @@ -79,13 +77,11 @@ TEST_F(LLSQTest, BasicLines) { // The point at 1,2 pulls the result away from what would otherwise be a // perfect fit to a horizontal line by 0.25 unit, with rms error of 0.433. ExpectCorrectLine(llsq, 0.0, 1.25, 0.433, 0.0, 1e-2); - ExpectCorrectVector(llsq, FCOORD(1.0f, 1.25f), - FCOORD(1.0f, 0.0f), 1e-3); + ExpectCorrectVector(llsq, FCOORD(1.0f, 1.25f), FCOORD(1.0f, 0.0f), 1e-3); llsq.add(1.0, 2.0, 10.0); // With a heavy weight, the point at 1,2 pulls the line nearer. ExpectCorrectLine(llsq, 0.0, 1.786, 0.41, 0.0, 1e-2); - ExpectCorrectVector(llsq, FCOORD(1.0f, 1.786f), - FCOORD(1.0f, 0.0f), 1e-3); + ExpectCorrectVector(llsq, FCOORD(1.0f, 1.786f), FCOORD(1.0f, 0.0f), 1e-3); } // Tests a simple baseline-style normalization with a rotation. @@ -93,14 +89,12 @@ TEST_F(LLSQTest, Vectors) { LLSQ llsq; llsq.add(1.0, 1.0); llsq.add(1.0, -1.0); - ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), - FCOORD(0.0f, 1.0f), 1e-6); + ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-6); llsq.add(0.9, -2.0); llsq.add(1.1, -3.0); llsq.add(0.9, 2.0); llsq.add(1.10001, 3.0); - ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), - FCOORD(0.0f, 1.0f), 1e-3); + ExpectCorrectVector(llsq, FCOORD(1.0f, 0.0f), FCOORD(0.0f, 1.0f), 1e-3); } // Verify that rms_orth() actually calculates: @@ -112,10 +106,10 @@ TEST_F(LLSQTest, RmsOrthWorksAsIntended) { pts.push_back(FCOORD(0.13, 0.77)); pts.push_back(FCOORD(0.16, 0.83)); pts.push_back(FCOORD(0.45, 0.79)); - VerifyRmsOrth(pts, FCOORD(1,0)); - VerifyRmsOrth(pts, FCOORD(1,1)); - VerifyRmsOrth(pts, FCOORD(1,2)); - VerifyRmsOrth(pts, FCOORD(2,1)); + VerifyRmsOrth(pts, FCOORD(1, 0)); + VerifyRmsOrth(pts, FCOORD(1, 1)); + VerifyRmsOrth(pts, FCOORD(1, 2)); + VerifyRmsOrth(pts, FCOORD(2, 1)); } } // namespace. diff --git a/unittest/loadlang_test.cc b/unittest/loadlang_test.cc index fc9054fca8..aba2945dc0 100644 --- a/unittest/loadlang_test.cc +++ b/unittest/loadlang_test.cc @@ -2,8 +2,8 @@ // File: loadlang_test.cc // Description: Test loading of All languages and Scripts for Tesseract. // Tests for All languages and scripts are Disabled by default. -// Force the disabled test to run if required by using the --gtest_also_run_disabled_tests argument. -// Author: Shree Devi Kumar +// Force the disabled test to run if required by using the +// --gtest_also_run_disabled_tests argument. Author: Shree Devi Kumar // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,230 +16,235 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// -#include "include_gunit.h" -#include "baseapi.h" #include +#include "baseapi.h" +#include "include_gunit.h" namespace { class QuickTest : public testing::Test { protected: - virtual void SetUp() { - start_time_ = time(nullptr); - } + virtual void SetUp() { start_time_ = time(nullptr); } virtual void TearDown() { const time_t end_time = time(nullptr); - EXPECT_TRUE(end_time - start_time_ <=25) << "The test took too long - " << ::testing::PrintToString(end_time - start_time_); + EXPECT_TRUE(end_time - start_time_ <= 25) + << "The test took too long - " + << ::testing::PrintToString(end_time - start_time_); } time_t start_time_; - }; +}; - void LangLoader(const char* lang, const char* tessdatadir) { - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); - ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract for $lang."; - api->End(); - } +void LangLoader(const char* lang, const char* tessdatadir) { + tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); + ASSERT_FALSE(api->Init(tessdatadir, lang)) + << "Could not initialize tesseract for $lang."; + api->End(); +} // For all languages - class LoadLanguage : public QuickTest , - public ::testing::WithParamInterface { - }; - - TEST_P(LoadLanguage, afr) {LangLoader("afr" , GetParam());} - TEST_P(LoadLanguage, amh) {LangLoader("amh" , GetParam());} - TEST_P(LoadLanguage, ara) {LangLoader("ara" , GetParam());} - TEST_P(LoadLanguage, asm) {LangLoader("asm" , GetParam());} - TEST_P(LoadLanguage, aze) {LangLoader("aze" , GetParam());} - TEST_P(LoadLanguage, aze_cyrl) {LangLoader("aze_cyrl" , GetParam());} - TEST_P(LoadLanguage, bel) {LangLoader("bel" , GetParam());} - TEST_P(LoadLanguage, ben) {LangLoader("ben" , GetParam());} - TEST_P(LoadLanguage, bod) {LangLoader("bod" , GetParam());} - TEST_P(LoadLanguage, bos) {LangLoader("bos" , GetParam());} - TEST_P(LoadLanguage, bre) {LangLoader("bre" , GetParam());} - TEST_P(LoadLanguage, bul) {LangLoader("bul" , GetParam());} - TEST_P(LoadLanguage, cat) {LangLoader("cat" , GetParam());} - TEST_P(LoadLanguage, ceb) {LangLoader("ceb" , GetParam());} - TEST_P(LoadLanguage, ces) {LangLoader("ces" , GetParam());} - TEST_P(LoadLanguage, chi_sim) {LangLoader("chi_sim" , GetParam());} - TEST_P(LoadLanguage, chi_sim_vert) {LangLoader("chi_sim_vert" , GetParam());} - TEST_P(LoadLanguage, chi_tra) {LangLoader("chi_tra" , GetParam());} - TEST_P(LoadLanguage, chi_tra_vert) {LangLoader("chi_tra_vert" , GetParam());} - TEST_P(LoadLanguage, chr) {LangLoader("chr" , GetParam());} - TEST_P(LoadLanguage, cos) {LangLoader("cos" , GetParam());} - TEST_P(LoadLanguage, cym) {LangLoader("cym" , GetParam());} - TEST_P(LoadLanguage, dan) {LangLoader("dan" , GetParam());} - TEST_P(LoadLanguage, deu) {LangLoader("deu" , GetParam());} - TEST_P(LoadLanguage, div) {LangLoader("div" , GetParam());} - TEST_P(LoadLanguage, dzo) {LangLoader("dzo" , GetParam());} - TEST_P(LoadLanguage, ell) {LangLoader("ell" , GetParam());} - TEST_P(LoadLanguage, eng) {LangLoader("eng" , GetParam());} - TEST_P(LoadLanguage, enm) {LangLoader("enm" , GetParam());} - TEST_P(LoadLanguage, epo) {LangLoader("epo" , GetParam());} - TEST_P(LoadLanguage, est) {LangLoader("est" , GetParam());} - TEST_P(LoadLanguage, eus) {LangLoader("eus" , GetParam());} - TEST_P(LoadLanguage, fao) {LangLoader("fao" , GetParam());} - TEST_P(LoadLanguage, fas) {LangLoader("fas" , GetParam());} - TEST_P(LoadLanguage, fil) {LangLoader("fil" , GetParam());} - TEST_P(LoadLanguage, fin) {LangLoader("fin" , GetParam());} - TEST_P(LoadLanguage, fra) {LangLoader("fra" , GetParam());} - TEST_P(LoadLanguage, frk) {LangLoader("frk" , GetParam());} - TEST_P(LoadLanguage, frm) {LangLoader("frm" , GetParam());} - TEST_P(LoadLanguage, fry) {LangLoader("fry" , GetParam());} - TEST_P(LoadLanguage, gla) {LangLoader("gla" , GetParam());} - TEST_P(LoadLanguage, gle) {LangLoader("gle" , GetParam());} - TEST_P(LoadLanguage, glg) {LangLoader("glg" , GetParam());} - TEST_P(LoadLanguage, grc) {LangLoader("grc" , GetParam());} - TEST_P(LoadLanguage, guj) {LangLoader("guj" , GetParam());} - TEST_P(LoadLanguage, hat) {LangLoader("hat" , GetParam());} - TEST_P(LoadLanguage, heb) {LangLoader("heb" , GetParam());} - TEST_P(LoadLanguage, hin) {LangLoader("hin" , GetParam());} - TEST_P(LoadLanguage, hrv) {LangLoader("hrv" , GetParam());} - TEST_P(LoadLanguage, hun) {LangLoader("hun" , GetParam());} - TEST_P(LoadLanguage, hye) {LangLoader("hye" , GetParam());} - TEST_P(LoadLanguage, iku) {LangLoader("iku" , GetParam());} - TEST_P(LoadLanguage, ind) {LangLoader("ind" , GetParam());} - TEST_P(LoadLanguage, isl) {LangLoader("isl" , GetParam());} - TEST_P(LoadLanguage, ita) {LangLoader("ita" , GetParam());} - TEST_P(LoadLanguage, ita_old) {LangLoader("ita_old" , GetParam());} - TEST_P(LoadLanguage, jav) {LangLoader("jav" , GetParam());} - TEST_P(LoadLanguage, jpn) {LangLoader("jpn" , GetParam());} - TEST_P(LoadLanguage, jpn_vert) {LangLoader("jpn_vert" , GetParam());} - TEST_P(LoadLanguage, kan) {LangLoader("kan" , GetParam());} - TEST_P(LoadLanguage, kat) {LangLoader("kat" , GetParam());} - TEST_P(LoadLanguage, kat_old) {LangLoader("kat_old" , GetParam());} - TEST_P(LoadLanguage, kaz) {LangLoader("kaz" , GetParam());} - TEST_P(LoadLanguage, khm) {LangLoader("khm" , GetParam());} - TEST_P(LoadLanguage, kir) {LangLoader("kir" , GetParam());} +class LoadLanguage : public QuickTest, + public ::testing::WithParamInterface {}; + +TEST_P(LoadLanguage, afr) { LangLoader("afr", GetParam()); } +TEST_P(LoadLanguage, amh) { LangLoader("amh", GetParam()); } +TEST_P(LoadLanguage, ara) { LangLoader("ara", GetParam()); } +TEST_P(LoadLanguage, asm) { LangLoader("asm", GetParam()); } +TEST_P(LoadLanguage, aze) { LangLoader("aze", GetParam()); } +TEST_P(LoadLanguage, aze_cyrl) { LangLoader("aze_cyrl", GetParam()); } +TEST_P(LoadLanguage, bel) { LangLoader("bel", GetParam()); } +TEST_P(LoadLanguage, ben) { LangLoader("ben", GetParam()); } +TEST_P(LoadLanguage, bod) { LangLoader("bod", GetParam()); } +TEST_P(LoadLanguage, bos) { LangLoader("bos", GetParam()); } +TEST_P(LoadLanguage, bre) { LangLoader("bre", GetParam()); } +TEST_P(LoadLanguage, bul) { LangLoader("bul", GetParam()); } +TEST_P(LoadLanguage, cat) { LangLoader("cat", GetParam()); } +TEST_P(LoadLanguage, ceb) { LangLoader("ceb", GetParam()); } +TEST_P(LoadLanguage, ces) { LangLoader("ces", GetParam()); } +TEST_P(LoadLanguage, chi_sim) { LangLoader("chi_sim", GetParam()); } +TEST_P(LoadLanguage, chi_sim_vert) { LangLoader("chi_sim_vert", GetParam()); } +TEST_P(LoadLanguage, chi_tra) { LangLoader("chi_tra", GetParam()); } +TEST_P(LoadLanguage, chi_tra_vert) { LangLoader("chi_tra_vert", GetParam()); } +TEST_P(LoadLanguage, chr) { LangLoader("chr", GetParam()); } +TEST_P(LoadLanguage, cos) { LangLoader("cos", GetParam()); } +TEST_P(LoadLanguage, cym) { LangLoader("cym", GetParam()); } +TEST_P(LoadLanguage, dan) { LangLoader("dan", GetParam()); } +TEST_P(LoadLanguage, deu) { LangLoader("deu", GetParam()); } +TEST_P(LoadLanguage, div) { LangLoader("div", GetParam()); } +TEST_P(LoadLanguage, dzo) { LangLoader("dzo", GetParam()); } +TEST_P(LoadLanguage, ell) { LangLoader("ell", GetParam()); } +TEST_P(LoadLanguage, eng) { LangLoader("eng", GetParam()); } +TEST_P(LoadLanguage, enm) { LangLoader("enm", GetParam()); } +TEST_P(LoadLanguage, epo) { LangLoader("epo", GetParam()); } +TEST_P(LoadLanguage, est) { LangLoader("est", GetParam()); } +TEST_P(LoadLanguage, eus) { LangLoader("eus", GetParam()); } +TEST_P(LoadLanguage, fao) { LangLoader("fao", GetParam()); } +TEST_P(LoadLanguage, fas) { LangLoader("fas", GetParam()); } +TEST_P(LoadLanguage, fil) { LangLoader("fil", GetParam()); } +TEST_P(LoadLanguage, fin) { LangLoader("fin", GetParam()); } +TEST_P(LoadLanguage, fra) { LangLoader("fra", GetParam()); } +TEST_P(LoadLanguage, frk) { LangLoader("frk", GetParam()); } +TEST_P(LoadLanguage, frm) { LangLoader("frm", GetParam()); } +TEST_P(LoadLanguage, fry) { LangLoader("fry", GetParam()); } +TEST_P(LoadLanguage, gla) { LangLoader("gla", GetParam()); } +TEST_P(LoadLanguage, gle) { LangLoader("gle", GetParam()); } +TEST_P(LoadLanguage, glg) { LangLoader("glg", GetParam()); } +TEST_P(LoadLanguage, grc) { LangLoader("grc", GetParam()); } +TEST_P(LoadLanguage, guj) { LangLoader("guj", GetParam()); } +TEST_P(LoadLanguage, hat) { LangLoader("hat", GetParam()); } +TEST_P(LoadLanguage, heb) { LangLoader("heb", GetParam()); } +TEST_P(LoadLanguage, hin) { LangLoader("hin", GetParam()); } +TEST_P(LoadLanguage, hrv) { LangLoader("hrv", GetParam()); } +TEST_P(LoadLanguage, hun) { LangLoader("hun", GetParam()); } +TEST_P(LoadLanguage, hye) { LangLoader("hye", GetParam()); } +TEST_P(LoadLanguage, iku) { LangLoader("iku", GetParam()); } +TEST_P(LoadLanguage, ind) { LangLoader("ind", GetParam()); } +TEST_P(LoadLanguage, isl) { LangLoader("isl", GetParam()); } +TEST_P(LoadLanguage, ita) { LangLoader("ita", GetParam()); } +TEST_P(LoadLanguage, ita_old) { LangLoader("ita_old", GetParam()); } +TEST_P(LoadLanguage, jav) { LangLoader("jav", GetParam()); } +TEST_P(LoadLanguage, jpn) { LangLoader("jpn", GetParam()); } +TEST_P(LoadLanguage, jpn_vert) { LangLoader("jpn_vert", GetParam()); } +TEST_P(LoadLanguage, kan) { LangLoader("kan", GetParam()); } +TEST_P(LoadLanguage, kat) { LangLoader("kat", GetParam()); } +TEST_P(LoadLanguage, kat_old) { LangLoader("kat_old", GetParam()); } +TEST_P(LoadLanguage, kaz) { LangLoader("kaz", GetParam()); } +TEST_P(LoadLanguage, khm) { LangLoader("khm", GetParam()); } +TEST_P(LoadLanguage, kir) { LangLoader("kir", GetParam()); } // TEST_P(LoadLanguage, kmr) {LangLoader("kmr" , GetParam());} - TEST_P(LoadLanguage, kor) {LangLoader("kor" , GetParam());} - TEST_P(LoadLanguage, kor_vert) {LangLoader("kor_vert" , GetParam());} - TEST_P(LoadLanguage, lao) {LangLoader("lao" , GetParam());} - TEST_P(LoadLanguage, lat) {LangLoader("lat" , GetParam());} - TEST_P(LoadLanguage, lav) {LangLoader("lav" , GetParam());} - TEST_P(LoadLanguage, lit) {LangLoader("lit" , GetParam());} - TEST_P(LoadLanguage, ltz) {LangLoader("ltz" , GetParam());} - TEST_P(LoadLanguage, mal) {LangLoader("mal" , GetParam());} - TEST_P(LoadLanguage, mar) {LangLoader("mar" , GetParam());} - TEST_P(LoadLanguage, mkd) {LangLoader("mkd" , GetParam());} - TEST_P(LoadLanguage, mlt) {LangLoader("mlt" , GetParam());} - TEST_P(LoadLanguage, mon) {LangLoader("mon" , GetParam());} - TEST_P(LoadLanguage, mri) {LangLoader("mri" , GetParam());} - TEST_P(LoadLanguage, msa) {LangLoader("msa" , GetParam());} - TEST_P(LoadLanguage, mya) {LangLoader("mya" , GetParam());} - TEST_P(LoadLanguage, nep) {LangLoader("nep" , GetParam());} - TEST_P(LoadLanguage, nld) {LangLoader("nld" , GetParam());} - TEST_P(LoadLanguage, nor) {LangLoader("nor" , GetParam());} - TEST_P(LoadLanguage, oci) {LangLoader("oci" , GetParam());} - TEST_P(LoadLanguage, ori) {LangLoader("ori" , GetParam());} - TEST_P(LoadLanguage, osd) {LangLoader("osd" , GetParam());} - TEST_P(LoadLanguage, pan) {LangLoader("pan" , GetParam());} - TEST_P(LoadLanguage, pol) {LangLoader("pol" , GetParam());} - TEST_P(LoadLanguage, por) {LangLoader("por" , GetParam());} - TEST_P(LoadLanguage, pus) {LangLoader("pus" , GetParam());} - TEST_P(LoadLanguage, que) {LangLoader("que" , GetParam());} - TEST_P(LoadLanguage, ron) {LangLoader("ron" , GetParam());} - TEST_P(LoadLanguage, rus) {LangLoader("rus" , GetParam());} - TEST_P(LoadLanguage, san) {LangLoader("san" , GetParam());} - TEST_P(LoadLanguage, sin) {LangLoader("sin" , GetParam());} - TEST_P(LoadLanguage, slk) {LangLoader("slk" , GetParam());} - TEST_P(LoadLanguage, slv) {LangLoader("slv" , GetParam());} - TEST_P(LoadLanguage, snd) {LangLoader("snd" , GetParam());} - TEST_P(LoadLanguage, spa) {LangLoader("spa" , GetParam());} - TEST_P(LoadLanguage, spa_old) {LangLoader("spa_old" , GetParam());} - TEST_P(LoadLanguage, sqi) {LangLoader("sqi" , GetParam());} - TEST_P(LoadLanguage, srp) {LangLoader("srp" , GetParam());} - TEST_P(LoadLanguage, srp_latn) {LangLoader("srp_latn" , GetParam());} - TEST_P(LoadLanguage, sun) {LangLoader("sun" , GetParam());} - TEST_P(LoadLanguage, swa) {LangLoader("swa" , GetParam());} - TEST_P(LoadLanguage, swe) {LangLoader("swe" , GetParam());} - TEST_P(LoadLanguage, syr) {LangLoader("syr" , GetParam());} - TEST_P(LoadLanguage, tam) {LangLoader("tam" , GetParam());} - TEST_P(LoadLanguage, tat) {LangLoader("tat" , GetParam());} - TEST_P(LoadLanguage, tel) {LangLoader("tel" , GetParam());} - TEST_P(LoadLanguage, tgk) {LangLoader("tgk" , GetParam());} - TEST_P(LoadLanguage, tha) {LangLoader("tha" , GetParam());} - TEST_P(LoadLanguage, tir) {LangLoader("tir" , GetParam());} - TEST_P(LoadLanguage, ton) {LangLoader("ton" , GetParam());} - TEST_P(LoadLanguage, tur) {LangLoader("tur" , GetParam());} - TEST_P(LoadLanguage, uig) {LangLoader("uig" , GetParam());} - TEST_P(LoadLanguage, ukr) {LangLoader("ukr" , GetParam());} - TEST_P(LoadLanguage, urd) {LangLoader("urd" , GetParam());} - TEST_P(LoadLanguage, uzb) {LangLoader("uzb" , GetParam());} - TEST_P(LoadLanguage, uzb_cyrl) {LangLoader("uzb_cyrl" , GetParam());} - TEST_P(LoadLanguage, vie) {LangLoader("vie" , GetParam());} - TEST_P(LoadLanguage, yid) {LangLoader("yid" , GetParam());} - TEST_P(LoadLanguage, yor) {LangLoader("yor" , GetParam());} - - INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_fast, LoadLanguage, - ::testing::Values(TESSDATA_DIR "_fast") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_best, LoadLanguage, - ::testing::Values(TESSDATA_DIR "_best") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata, LoadLanguage, - ::testing::Values(TESSDATA_DIR) ); +TEST_P(LoadLanguage, kor) { LangLoader("kor", GetParam()); } +TEST_P(LoadLanguage, kor_vert) { LangLoader("kor_vert", GetParam()); } +TEST_P(LoadLanguage, lao) { LangLoader("lao", GetParam()); } +TEST_P(LoadLanguage, lat) { LangLoader("lat", GetParam()); } +TEST_P(LoadLanguage, lav) { LangLoader("lav", GetParam()); } +TEST_P(LoadLanguage, lit) { LangLoader("lit", GetParam()); } +TEST_P(LoadLanguage, ltz) { LangLoader("ltz", GetParam()); } +TEST_P(LoadLanguage, mal) { LangLoader("mal", GetParam()); } +TEST_P(LoadLanguage, mar) { LangLoader("mar", GetParam()); } +TEST_P(LoadLanguage, mkd) { LangLoader("mkd", GetParam()); } +TEST_P(LoadLanguage, mlt) { LangLoader("mlt", GetParam()); } +TEST_P(LoadLanguage, mon) { LangLoader("mon", GetParam()); } +TEST_P(LoadLanguage, mri) { LangLoader("mri", GetParam()); } +TEST_P(LoadLanguage, msa) { LangLoader("msa", GetParam()); } +TEST_P(LoadLanguage, mya) { LangLoader("mya", GetParam()); } +TEST_P(LoadLanguage, nep) { LangLoader("nep", GetParam()); } +TEST_P(LoadLanguage, nld) { LangLoader("nld", GetParam()); } +TEST_P(LoadLanguage, nor) { LangLoader("nor", GetParam()); } +TEST_P(LoadLanguage, oci) { LangLoader("oci", GetParam()); } +TEST_P(LoadLanguage, ori) { LangLoader("ori", GetParam()); } +TEST_P(LoadLanguage, osd) { LangLoader("osd", GetParam()); } +TEST_P(LoadLanguage, pan) { LangLoader("pan", GetParam()); } +TEST_P(LoadLanguage, pol) { LangLoader("pol", GetParam()); } +TEST_P(LoadLanguage, por) { LangLoader("por", GetParam()); } +TEST_P(LoadLanguage, pus) { LangLoader("pus", GetParam()); } +TEST_P(LoadLanguage, que) { LangLoader("que", GetParam()); } +TEST_P(LoadLanguage, ron) { LangLoader("ron", GetParam()); } +TEST_P(LoadLanguage, rus) { LangLoader("rus", GetParam()); } +TEST_P(LoadLanguage, san) { LangLoader("san", GetParam()); } +TEST_P(LoadLanguage, sin) { LangLoader("sin", GetParam()); } +TEST_P(LoadLanguage, slk) { LangLoader("slk", GetParam()); } +TEST_P(LoadLanguage, slv) { LangLoader("slv", GetParam()); } +TEST_P(LoadLanguage, snd) { LangLoader("snd", GetParam()); } +TEST_P(LoadLanguage, spa) { LangLoader("spa", GetParam()); } +TEST_P(LoadLanguage, spa_old) { LangLoader("spa_old", GetParam()); } +TEST_P(LoadLanguage, sqi) { LangLoader("sqi", GetParam()); } +TEST_P(LoadLanguage, srp) { LangLoader("srp", GetParam()); } +TEST_P(LoadLanguage, srp_latn) { LangLoader("srp_latn", GetParam()); } +TEST_P(LoadLanguage, sun) { LangLoader("sun", GetParam()); } +TEST_P(LoadLanguage, swa) { LangLoader("swa", GetParam()); } +TEST_P(LoadLanguage, swe) { LangLoader("swe", GetParam()); } +TEST_P(LoadLanguage, syr) { LangLoader("syr", GetParam()); } +TEST_P(LoadLanguage, tam) { LangLoader("tam", GetParam()); } +TEST_P(LoadLanguage, tat) { LangLoader("tat", GetParam()); } +TEST_P(LoadLanguage, tel) { LangLoader("tel", GetParam()); } +TEST_P(LoadLanguage, tgk) { LangLoader("tgk", GetParam()); } +TEST_P(LoadLanguage, tha) { LangLoader("tha", GetParam()); } +TEST_P(LoadLanguage, tir) { LangLoader("tir", GetParam()); } +TEST_P(LoadLanguage, ton) { LangLoader("ton", GetParam()); } +TEST_P(LoadLanguage, tur) { LangLoader("tur", GetParam()); } +TEST_P(LoadLanguage, uig) { LangLoader("uig", GetParam()); } +TEST_P(LoadLanguage, ukr) { LangLoader("ukr", GetParam()); } +TEST_P(LoadLanguage, urd) { LangLoader("urd", GetParam()); } +TEST_P(LoadLanguage, uzb) { LangLoader("uzb", GetParam()); } +TEST_P(LoadLanguage, uzb_cyrl) { LangLoader("uzb_cyrl", GetParam()); } +TEST_P(LoadLanguage, vie) { LangLoader("vie", GetParam()); } +TEST_P(LoadLanguage, yid) { LangLoader("yid", GetParam()); } +TEST_P(LoadLanguage, yor) { LangLoader("yor", GetParam()); } + +INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_fast, LoadLanguage, + ::testing::Values(TESSDATA_DIR "_fast")); +INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_best, LoadLanguage, + ::testing::Values(TESSDATA_DIR "_best")); +INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata, LoadLanguage, + ::testing::Values(TESSDATA_DIR)); // For all scripts - class LoadScript : public QuickTest , - public ::testing::WithParamInterface { - }; - - TEST_P(LoadScript, Arabic) {LangLoader("script/Arabic" , GetParam());} - TEST_P(LoadScript, Armenian) {LangLoader("script/Armenian" , GetParam());} - TEST_P(LoadScript, Bengali) {LangLoader("script/Bengali" , GetParam());} - TEST_P(LoadScript, Canadian_Aboriginal) {LangLoader("script/Canadian_Aboriginal" , GetParam());} - TEST_P(LoadScript, Cherokee) {LangLoader("script/Cherokee" , GetParam());} - TEST_P(LoadScript, Cyrillic) {LangLoader("script/Cyrillic" , GetParam());} - TEST_P(LoadScript, Devanagari) {LangLoader("script/Devanagari" , GetParam());} - TEST_P(LoadScript, Ethiopic) {LangLoader("script/Ethiopic" , GetParam());} - TEST_P(LoadScript, Fraktur) {LangLoader("script/Fraktur" , GetParam());} - TEST_P(LoadScript, Georgian) {LangLoader("script/Georgian" , GetParam());} - TEST_P(LoadScript, Greek) {LangLoader("script/Greek" , GetParam());} - TEST_P(LoadScript, Gujarati) {LangLoader("script/Gujarati" , GetParam());} - TEST_P(LoadScript, Gurmukhi) {LangLoader("script/Gurmukhi" , GetParam());} - TEST_P(LoadScript, HanS) {LangLoader("script/HanS" , GetParam());} - TEST_P(LoadScript, HanS_vert) {LangLoader("script/HanS_vert" , GetParam());} - TEST_P(LoadScript, HanT) {LangLoader("script/HanT" , GetParam());} - TEST_P(LoadScript, HanT_vert) {LangLoader("script/HanT_vert" , GetParam());} - TEST_P(LoadScript, Hangul) {LangLoader("script/Hangul" , GetParam());} - TEST_P(LoadScript, Hangul_vert) {LangLoader("script/Hangul_vert" , GetParam());} - TEST_P(LoadScript, Hebrew) {LangLoader("script/Hebrew" , GetParam());} - TEST_P(LoadScript, Japanese) {LangLoader("script/Japanese" , GetParam());} - TEST_P(LoadScript, Japanese_vert) {LangLoader("script/Japanese_vert" , GetParam());} - TEST_P(LoadScript, Kannada) {LangLoader("script/Kannada" , GetParam());} - TEST_P(LoadScript, Khmer) {LangLoader("script/Khmer" , GetParam());} - TEST_P(LoadScript, Lao) {LangLoader("script/Lao" , GetParam());} - TEST_P(LoadScript, Latin) {LangLoader("script/Latin" , GetParam());} - TEST_P(LoadScript, Malayalam) {LangLoader("script/Malayalam" , GetParam());} - TEST_P(LoadScript, Myanmar) {LangLoader("script/Myanmar" , GetParam());} - TEST_P(LoadScript, Oriya) {LangLoader("script/Oriya" , GetParam());} - TEST_P(LoadScript, Sinhala) {LangLoader("script/Sinhala" , GetParam());} - TEST_P(LoadScript, Syriac) {LangLoader("script/Syriac" , GetParam());} - TEST_P(LoadScript, Tamil) {LangLoader("script/Tamil" , GetParam());} - TEST_P(LoadScript, Telugu) {LangLoader("script/Telugu" , GetParam());} - TEST_P(LoadScript, Thaana) {LangLoader("script/Thaana" , GetParam());} - TEST_P(LoadScript, Thai) {LangLoader("script/Thai" , GetParam());} - TEST_P(LoadScript, Tibetan) {LangLoader("script/Tibetan" , GetParam());} - TEST_P(LoadScript, Vietnamese) {LangLoader("script/Vietnamese" , GetParam());} - - INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_fast, LoadScript, - ::testing::Values(TESSDATA_DIR "_fast") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata_best, LoadScript, - ::testing::Values(TESSDATA_DIR "_best") ); - INSTANTIATE_TEST_CASE_P( DISABLED_Tessdata, LoadScript, - ::testing::Values(TESSDATA_DIR) ); - - class LoadLang : public QuickTest { - }; - - // Test Load of English here, as the parameterized tests are disabled by default. - TEST_F(LoadLang, engFast) {LangLoader("eng" , TESSDATA_DIR "_fast");} - TEST_F(LoadLang, engBest) {LangLoader("eng" , TESSDATA_DIR "_best");} - TEST_F(LoadLang, engBestInt) {LangLoader("eng" , TESSDATA_DIR);} - +class LoadScript : public QuickTest, + public ::testing::WithParamInterface {}; + +TEST_P(LoadScript, Arabic) { LangLoader("script/Arabic", GetParam()); } +TEST_P(LoadScript, Armenian) { LangLoader("script/Armenian", GetParam()); } +TEST_P(LoadScript, Bengali) { LangLoader("script/Bengali", GetParam()); } +TEST_P(LoadScript, Canadian_Aboriginal) { + LangLoader("script/Canadian_Aboriginal", GetParam()); +} +TEST_P(LoadScript, Cherokee) { LangLoader("script/Cherokee", GetParam()); } +TEST_P(LoadScript, Cyrillic) { LangLoader("script/Cyrillic", GetParam()); } +TEST_P(LoadScript, Devanagari) { LangLoader("script/Devanagari", GetParam()); } +TEST_P(LoadScript, Ethiopic) { LangLoader("script/Ethiopic", GetParam()); } +TEST_P(LoadScript, Fraktur) { LangLoader("script/Fraktur", GetParam()); } +TEST_P(LoadScript, Georgian) { LangLoader("script/Georgian", GetParam()); } +TEST_P(LoadScript, Greek) { LangLoader("script/Greek", GetParam()); } +TEST_P(LoadScript, Gujarati) { LangLoader("script/Gujarati", GetParam()); } +TEST_P(LoadScript, Gurmukhi) { LangLoader("script/Gurmukhi", GetParam()); } +TEST_P(LoadScript, HanS) { LangLoader("script/HanS", GetParam()); } +TEST_P(LoadScript, HanS_vert) { LangLoader("script/HanS_vert", GetParam()); } +TEST_P(LoadScript, HanT) { LangLoader("script/HanT", GetParam()); } +TEST_P(LoadScript, HanT_vert) { LangLoader("script/HanT_vert", GetParam()); } +TEST_P(LoadScript, Hangul) { LangLoader("script/Hangul", GetParam()); } +TEST_P(LoadScript, Hangul_vert) { + LangLoader("script/Hangul_vert", GetParam()); +} +TEST_P(LoadScript, Hebrew) { LangLoader("script/Hebrew", GetParam()); } +TEST_P(LoadScript, Japanese) { LangLoader("script/Japanese", GetParam()); } +TEST_P(LoadScript, Japanese_vert) { + LangLoader("script/Japanese_vert", GetParam()); +} +TEST_P(LoadScript, Kannada) { LangLoader("script/Kannada", GetParam()); } +TEST_P(LoadScript, Khmer) { LangLoader("script/Khmer", GetParam()); } +TEST_P(LoadScript, Lao) { LangLoader("script/Lao", GetParam()); } +TEST_P(LoadScript, Latin) { LangLoader("script/Latin", GetParam()); } +TEST_P(LoadScript, Malayalam) { LangLoader("script/Malayalam", GetParam()); } +TEST_P(LoadScript, Myanmar) { LangLoader("script/Myanmar", GetParam()); } +TEST_P(LoadScript, Oriya) { LangLoader("script/Oriya", GetParam()); } +TEST_P(LoadScript, Sinhala) { LangLoader("script/Sinhala", GetParam()); } +TEST_P(LoadScript, Syriac) { LangLoader("script/Syriac", GetParam()); } +TEST_P(LoadScript, Tamil) { LangLoader("script/Tamil", GetParam()); } +TEST_P(LoadScript, Telugu) { LangLoader("script/Telugu", GetParam()); } +TEST_P(LoadScript, Thaana) { LangLoader("script/Thaana", GetParam()); } +TEST_P(LoadScript, Thai) { LangLoader("script/Thai", GetParam()); } +TEST_P(LoadScript, Tibetan) { LangLoader("script/Tibetan", GetParam()); } +TEST_P(LoadScript, Vietnamese) { LangLoader("script/Vietnamese", GetParam()); } + +INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_fast, LoadScript, + ::testing::Values(TESSDATA_DIR "_fast")); +INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata_best, LoadScript, + ::testing::Values(TESSDATA_DIR "_best")); +INSTANTIATE_TEST_CASE_P(DISABLED_Tessdata, LoadScript, + ::testing::Values(TESSDATA_DIR)); + +class LoadLang : public QuickTest {}; + +// Test Load of English here, as the parameterized tests are disabled by +// default. +TEST_F(LoadLang, engFast) { LangLoader("eng", TESSDATA_DIR "_fast"); } +TEST_F(LoadLang, engBest) { LangLoader("eng", TESSDATA_DIR "_best"); } +TEST_F(LoadLang, engBestInt) { LangLoader("eng", TESSDATA_DIR); } + // Use class LoadLang for languages which are NOT there in all three repos - TEST_F(LoadLang, kmrFast) {LangLoader("kmr" , TESSDATA_DIR "_fast");} - TEST_F(LoadLang, kmrBest) {LangLoader("kmr" , TESSDATA_DIR "_best");} +TEST_F(LoadLang, kmrFast) { LangLoader("kmr", TESSDATA_DIR "_fast"); } +TEST_F(LoadLang, kmrBest) { LangLoader("kmr", TESSDATA_DIR "_best"); } // TEST_F(LoadLang, kmrBestInt) {LangLoader("kmr" , TESSDATA_DIR);} } // namespace diff --git a/unittest/log.h b/unittest/log.h index 723abb16d4..3efb8585a2 100644 --- a/unittest/log.h +++ b/unittest/log.h @@ -1,7 +1,8 @@ /////////////////////////////////////////////////////////////////////// // File: log.h // Description: Include for custom log message for unittest for tesseract. -// based on //https://stackoverflow.com/questions/16491675/how-to-send-custom-message-in-google-c-testing-framework +// based on +// //https://stackoverflow.com/questions/16491675/how-to-send-custom-message-in-google-c-testing-framework // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,11 +19,13 @@ #include - static class LOG { public: LOG() {} - std::ostream& info() { - std::cout << "[ LOG MSG ] "; - return std::cout; - } +static class LOG { + public: + LOG() {} + std::ostream& info() { + std::cout << "[ LOG MSG ] "; + return std::cout; + } } log; #endif // TESSERACT_UNITTEST_LOG_H_ diff --git a/unittest/lstmtrainer_test.cc b/unittest/lstmtrainer_test.cc index 085ae62b51..872a705cfc 100644 --- a/unittest/lstmtrainer_test.cc +++ b/unittest/lstmtrainer_test.cc @@ -61,9 +61,8 @@ TEST_F(LSTMTrainerTest, ConvertModel) { deu_trainer.InitCharSet(TestDataNameToPath("deu.traineddata")); // Load the fra traineddata, strip out the model, and save to a tmp file. TessdataManager mgr; - string fra_data = file::JoinPath( - FLAGS_test_srcdir, "tessdata_best", - "fra.traineddata"); + string fra_data = + file::JoinPath(FLAGS_test_srcdir, "tessdata_best", "fra.traineddata"); CHECK(mgr.Init(fra_data.c_str())) << "Failed to load " << fra_data; string model_path = file::JoinPath(FLAGS_test_tmpdir, "fra.lstm"); CHECK(mgr.ExtractToFile(model_path.c_str())); @@ -76,7 +75,7 @@ TEST_F(LSTMTrainerTest, ConvertModel) { // baseapi_test.cc). TessBaseAPI api; api.Init(FLAGS_test_tmpdir.c_str(), "deu", tesseract::OEM_LSTM_ONLY); - Pix *src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str()); + Pix* src_pix = pixRead(TestDataNameToPath("phototest.tif").c_str()); CHECK(src_pix); api.SetImage(src_pix); std::unique_ptr result(api.GetUTF8Text()); diff --git a/unittest/mastertrainer_test.cc b/unittest/mastertrainer_test.cc index d3286a72ea..dffaca0bd9 100644 --- a/unittest/mastertrainer_test.cc +++ b/unittest/mastertrainer_test.cc @@ -41,8 +41,9 @@ const int kNumNonReject = 1000; const int kNumCorrect = kNumNonReject - kNumTop1Errs; // The total number of answers is given by the number of non-rejects plus // all the multiple answers. -const int kNumAnswers = kNumNonReject + 2*(kNumTop2Errs - kNumTopNErrs) + - (kNumTop1Errs - kNumTop2Errs) + (kNumTopTopErrs - kNumTop1Errs); +const int kNumAnswers = kNumNonReject + 2 * (kNumTop2Errs - kNumTopNErrs) + + (kNumTop1Errs - kNumTop2Errs) + + (kNumTopTopErrs - kNumTop1Errs); namespace tesseract { @@ -51,7 +52,7 @@ namespace tesseract { class MockClassifier : public ShapeClassifier { public: explicit MockClassifier(ShapeTable* shape_table) - : shape_table_(shape_table), num_done_(0), done_bad_font_(false) { + : shape_table_(shape_table), num_done_(0), done_bad_font_(false) { // Add a false font answer to the shape table. We pick a random unichar_id, // add a new shape for it with a false font. Font must actually exist in // the font table, but not match anything in the first 1000 samples. @@ -108,9 +109,7 @@ class MockClassifier : public ShapeClassifier { return results->size(); } // Provides access to the ShapeTable that this classifier works with. - virtual const ShapeTable* GetShapeTable() const { - return shape_table_; - } + virtual const ShapeTable* GetShapeTable() const { return shape_table_; } private: // Borrowed pointer to the ShapeTable. @@ -140,12 +139,10 @@ const double kMin1lDistance = 0.25; class MasterTrainerTest : public testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessdataPath() { - return file::JoinPath(FLAGS_test_srcdir, - "tessdata"); + return file::JoinPath(FLAGS_test_srcdir, "tessdata"); } string TmpNameToPath(const string& name) { return file::JoinPath(FLAGS_test_tmpdir, name); @@ -169,15 +166,15 @@ class MasterTrainerTest : public testing::Test { FLAGS_X = TestDataNameToPath("eng.xheights"); FLAGS_U = TestDataNameToPath("eng.unicharset"); string tr_file_name(TestDataNameToPath("eng.Arial.exp0.tr")); - const char* argv[] = {tr_file_name.c_str() }; + const char* argv[] = {tr_file_name.c_str()}; int argc = 1; STRING file_prefix; delete master_trainer_; delete shape_table_; shape_table_ = NULL; tessoptind = 0; - master_trainer_ = LoadTrainingData(argc, argv, false, - &shape_table_, &file_prefix); + master_trainer_ = + LoadTrainingData(argc, argv, false, &shape_table_, &file_prefix); EXPECT_TRUE(master_trainer_ != NULL); EXPECT_TRUE(shape_table_ != NULL); } @@ -203,29 +200,29 @@ class MasterTrainerTest : public testing::Test { int shape_1 = shape_table_->FindShape(unichar_1, font_id); EXPECT_GE(shape_1, 0); - float dist_I_l = master_trainer_->ShapeDistance(*shape_table_, - shape_I, shape_l); + float dist_I_l = + master_trainer_->ShapeDistance(*shape_table_, shape_I, shape_l); // No tolerance here. We expect that I and l should match exactly. EXPECT_EQ(0.0f, dist_I_l); - float dist_l_I = master_trainer_->ShapeDistance(*shape_table_, - shape_l, shape_I); + float dist_l_I = + master_trainer_->ShapeDistance(*shape_table_, shape_l, shape_I); // BOTH ways. EXPECT_EQ(0.0f, dist_l_I); // l/1 on the other hand should be distinct. - float dist_l_1 = master_trainer_->ShapeDistance(*shape_table_, - shape_l, shape_1); + float dist_l_1 = + master_trainer_->ShapeDistance(*shape_table_, shape_l, shape_1); EXPECT_GT(dist_l_1, kMin1lDistance); - float dist_1_l = master_trainer_->ShapeDistance(*shape_table_, - shape_1, shape_l); + float dist_1_l = + master_trainer_->ShapeDistance(*shape_table_, shape_1, shape_l); EXPECT_GT(dist_1_l, kMin1lDistance); // So should I/1. - float dist_I_1 = master_trainer_->ShapeDistance(*shape_table_, - shape_I, shape_1); + float dist_I_1 = + master_trainer_->ShapeDistance(*shape_table_, shape_I, shape_1); EXPECT_GT(dist_I_1, kMin1lDistance); - float dist_1_I = master_trainer_->ShapeDistance(*shape_table_, - shape_1, shape_I); + float dist_1_I = + master_trainer_->ShapeDistance(*shape_table_, shape_1, shape_I); EXPECT_GT(dist_1_I, kMin1lDistance); } @@ -249,15 +246,14 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) { LoadMasterTrainer(); // Add the space character to the shape_table_ if not already present to // count junk. - if (shape_table_->FindShape(0, -1) < 0) - shape_table_->AddShape(0, 0); + if (shape_table_->FindShape(0, -1) < 0) shape_table_->AddShape(0, 0); // Make a mock classifier. tesseract::ShapeClassifier* shape_classifier = new tesseract::MockClassifier(shape_table_); // Get the accuracy report. STRING accuracy_report; - master_trainer_->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR, - 0, false, shape_classifier, + master_trainer_->TestClassifierOnSamples(tesseract::CT_UNICHAR_TOP1_ERR, 0, + false, shape_classifier, &accuracy_report); LOG(INFO) << accuracy_report.string(); string result_string = accuracy_report.string(); @@ -287,6 +283,3 @@ TEST_F(MasterTrainerTest, ErrorCounterTest) { } } // namespace. - - - diff --git a/unittest/matrix_test.cc b/unittest/matrix_test.cc index 7d8a7a402c..8a035fe32c 100644 --- a/unittest/matrix_test.cc +++ b/unittest/matrix_test.cc @@ -15,8 +15,8 @@ /////////////////////////////////////////////////////////////////////// #include "matrix.h" -#include "include_gunit.h" #include "genericvector.h" +#include "include_gunit.h" #include "tprintf.h" namespace { diff --git a/unittest/networkio_test.cc b/unittest/networkio_test.cc index a15bc0c651..8efe743a84 100644 --- a/unittest/networkio_test.cc +++ b/unittest/networkio_test.cc @@ -1,10 +1,10 @@ #include "tesseract/lstm/networkio.h" #include "tesseract/lstm/stridemap.h" -using tesseract::FlexDimensions; using tesseract::FD_BATCH; using tesseract::FD_HEIGHT; using tesseract::FD_WIDTH; +using tesseract::FlexDimensions; using tesseract::NetworkIO; using tesseract::StrideMap; @@ -93,9 +93,9 @@ TEST_F(NetworkioTest, CopyWithYReversal) { StrideMap::Index index(copy.stride_map()); int next_t = 0; int pos = 0; - std::vector expected_values = {8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, - 3, 27, 28, 29, 30, 31, 22, 23, 24, 25, 26, - 17, 18, 19, 20, 21, 12, 13, 14, 15, 16}; + std::vector expected_values = { + 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 27, 28, 29, 30, + 31, 22, 23, 24, 25, 26, 17, 18, 19, 20, 21, 12, 13, 14, 15, 16}; do { int t = index.t(); // The indexed values match the expected values. @@ -125,9 +125,9 @@ TEST_F(NetworkioTest, CopyWithXReversal) { StrideMap::Index index(copy.stride_map()); int next_t = 0; int pos = 0; - std::vector expected_values = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, - 8, 16, 15, 14, 13, 12, 21, 20, 19, 18, 17, - 26, 25, 24, 23, 22, 31, 30, 29, 28, 27}; + std::vector expected_values = { + 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 16, 15, 14, 13, + 12, 21, 20, 19, 18, 17, 26, 25, 24, 23, 22, 31, 30, 29, 28, 27}; do { int t = index.t(); // The indexed values match the expected values. @@ -157,9 +157,9 @@ TEST_F(NetworkioTest, CopyWithXYTranspose) { StrideMap::Index index(copy.stride_map()); int next_t = 0; int pos = 0; - std::vector expected_values = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, - 11, 12, 17, 22, 27, 13, 18, 23, 28, 14, 19, - 24, 29, 15, 20, 25, 30, 16, 21, 26, 31}; + std::vector expected_values = { + 0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11, 12, 17, 22, 27, + 13, 18, 23, 28, 14, 19, 24, 29, 15, 20, 25, 30, 16, 21, 26, 31}; do { int t = index.t(); // The indexed values match the expected values. diff --git a/unittest/normstrngs_test.cc b/unittest/normstrngs_test.cc index 73cbe2af81..44e6cc7eb6 100644 --- a/unittest/normstrngs_test.cc +++ b/unittest/normstrngs_test.cc @@ -23,7 +23,7 @@ TEST(NormstrngsTest, BasicText) { } TEST(NormstrngsTest, LigatureText) { - const char* kTwoByteLigText = "ij"; // U+0133 (ij) -> ij + const char* kTwoByteLigText = "ij"; // U+0133 (ij) -> ij string result; EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize, GraphemeNorm::kNormalize, kTwoByteLigText, @@ -51,7 +51,7 @@ TEST(NormstrngsTest, OcrSpecificNormalization) { &result)); EXPECT_STREQ("\"Hi", result.c_str()); - const char* kEmDash = "Hi—"; // U+2014 (—) -> U+02D (-) + const char* kEmDash = "Hi—"; // U+2014 (—) -> U+02D (-) EXPECT_TRUE(NormalizeUTF8String(UnicodeNormMode::kNFKC, OCRNorm::kNormalize, GraphemeNorm::kNormalize, kEmDash, &result)); EXPECT_STREQ("Hi-", result.c_str()); diff --git a/unittest/nthitem_test.cc b/unittest/nthitem_test.cc index d7b882d238..0280b5ae85 100644 --- a/unittest/nthitem_test.cc +++ b/unittest/nthitem_test.cc @@ -16,7 +16,7 @@ namespace tesseract { -int test_data[] = { 8, 1, 2, -4, 7, 9, 65536, 4, 9, 0, -32767, 6, 7}; +int test_data[] = {8, 1, 2, -4, 7, 9, 65536, 4, 9, 0, -32767, 6, 7}; // The fixture for testing GenericHeap and DoublePtr. class NthItemTest : public testing::Test { @@ -64,7 +64,7 @@ TEST_F(NthItemTest, GeneralTest) { TEST_F(NthItemTest, BoringTest) { KDVector v; // Push the test data onto the KDVector. - int test_data[] = { 8, 8, 8, 8, 8, 7, 7, 7, 7}; + int test_data[] = {8, 8, 8, 8, 8, 7, 7, 7, 7}; for (int i = 0; i < ARRAYSIZE(test_data); ++i) { IntKDPair pair(test_data[i], i); v.push_back(pair); diff --git a/unittest/osd_test.cc b/unittest/osd_test.cc index 2bd408e3fb..cf49cb2e15 100644 --- a/unittest/osd_test.cc +++ b/unittest/osd_test.cc @@ -14,104 +14,112 @@ // limitations under the License. /////////////////////////////////////////////////////////////////////// -//based on https://gist.github.com/amitdo/7c7a522004dd79b398340c9595b377e1 +// based on https://gist.github.com/amitdo/7c7a522004dd79b398340c9595b377e1 // expects clones of tessdata, tessdata_fast and tessdata_best repos //#include "log.h" -#include "include_gunit.h" -#include "baseapi.h" -#include "leptonica/allheaders.h" #include #include +#include "baseapi.h" +#include "include_gunit.h" +#include "leptonica/allheaders.h" namespace { class TestClass : public testing::Test { protected: - }; - - void OSDTester( int expected_deg, const char* imgname, const char* tessdatadir) { - //log.info() << tessdatadir << " for image: " << imgname << std::endl; - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); - ASSERT_FALSE(api->Init(tessdatadir, "osd")) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); - ASSERT_TRUE(image != nullptr) << "Failed to read test image."; - api->SetImage(image); - int orient_deg; - float orient_conf; - const char* script_name; - float script_conf; - bool detected = api->DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf); - ASSERT_FALSE(!detected) << "Failed to detect OSD."; - printf("************ Orientation in degrees: %d, Orientation confidence: %.2f\n" - " Script: %s, Script confidence: %.2f\n", - orient_deg, orient_conf, - script_name, script_conf); - EXPECT_EQ(expected_deg, orient_deg); - api->End(); - pixDestroy(&image); - } - - class OSDTest : public TestClass , - public ::testing::WithParamInterface> {}; - - TEST_P(OSDTest, MatchOrientationDegrees) { - OSDTester(std::get<0>(GetParam()), std::get<1>(GetParam()), std::get<2>(GetParam())); - } - - INSTANTIATE_TEST_CASE_P( TessdataEngEuroHebrew, OSDTest, - ::testing::Combine( - ::testing::Values(0), - ::testing::Values(TESTING_DIR "/phototest.tif", - TESTING_DIR "/eurotext.tif", - TESTING_DIR "/hebrew.png"), - ::testing::Values(TESSDATA_DIR))); - - INSTANTIATE_TEST_CASE_P( TessdataBestEngEuroHebrew, OSDTest, - ::testing::Combine( - ::testing::Values(0), - ::testing::Values(TESTING_DIR "/phototest.tif", - TESTING_DIR "/eurotext.tif", - TESTING_DIR "/hebrew.png"), - ::testing::Values(TESSDATA_DIR "_best"))); - - INSTANTIATE_TEST_CASE_P( TessdataFastEngEuroHebrew, OSDTest, - ::testing::Combine( - ::testing::Values(0), - ::testing::Values(TESTING_DIR "/phototest.tif", - TESTING_DIR "/eurotext.tif", - TESTING_DIR "/hebrew.png"), - ::testing::Values(TESSDATA_DIR "_fast"))); - - INSTANTIATE_TEST_CASE_P( TessdataFastRotated90, OSDTest, - ::testing::Combine( - ::testing::Values(90), - ::testing::Values(TESTING_DIR "/phototest-rotated-R.png"), - ::testing::Values(TESSDATA_DIR "_fast"))); - - INSTANTIATE_TEST_CASE_P( TessdataFastRotated180, OSDTest, - ::testing::Combine( - ::testing::Values(180), - ::testing::Values(TESTING_DIR "/phototest-rotated-180.png"), - ::testing::Values(TESSDATA_DIR "_fast"))); - - INSTANTIATE_TEST_CASE_P( TessdataFastRotated270, OSDTest, - ::testing::Combine( - ::testing::Values(270), - ::testing::Values(TESTING_DIR "/phototest-rotated-L.png"), - ::testing::Values(TESSDATA_DIR "_fast"))); - - INSTANTIATE_TEST_CASE_P( TessdataFastDevaRotated270, OSDTest, - ::testing::Combine( - ::testing::Values(270), - ::testing::Values(TESTING_DIR "/devatest-rotated-270.png"), - ::testing::Values(TESSDATA_DIR "_fast"))); - - INSTANTIATE_TEST_CASE_P( TessdataFastDeva, OSDTest, - ::testing::Combine( - ::testing::Values(0), - ::testing::Values(TESTING_DIR "/devatest.png"), - ::testing::Values(TESSDATA_DIR "_fast"))); +}; + +void OSDTester(int expected_deg, const char* imgname, const char* tessdatadir) { + // log.info() << tessdatadir << " for image: " << imgname << std::endl; + tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); + ASSERT_FALSE(api->Init(tessdatadir, "osd")) + << "Could not initialize tesseract."; + Pix* image = pixRead(imgname); + ASSERT_TRUE(image != nullptr) << "Failed to read test image."; + api->SetImage(image); + int orient_deg; + float orient_conf; + const char* script_name; + float script_conf; + bool detected = api->DetectOrientationScript(&orient_deg, &orient_conf, + &script_name, &script_conf); + ASSERT_FALSE(!detected) << "Failed to detect OSD."; + printf( + "************ Orientation in degrees: %d, Orientation confidence: %.2f\n" + " Script: %s, Script confidence: %.2f\n", + orient_deg, orient_conf, script_name, script_conf); + EXPECT_EQ(expected_deg, orient_deg); + api->End(); + pixDestroy(&image); +} + +class OSDTest : public TestClass, + public ::testing::WithParamInterface< + std::tuple> {}; + +TEST_P(OSDTest, MatchOrientationDegrees) { + OSDTester(std::get<0>(GetParam()), std::get<1>(GetParam()), + std::get<2>(GetParam())); +} + +INSTANTIATE_TEST_CASE_P( + TessdataEngEuroHebrew, OSDTest, + ::testing::Combine(::testing::Values(0), + ::testing::Values(TESTING_DIR "/phototest.tif", + TESTING_DIR "/eurotext.tif", + TESTING_DIR "/hebrew.png"), + ::testing::Values(TESSDATA_DIR))); + +INSTANTIATE_TEST_CASE_P( + TessdataBestEngEuroHebrew, OSDTest, + ::testing::Combine(::testing::Values(0), + ::testing::Values(TESTING_DIR "/phototest.tif", + TESTING_DIR "/eurotext.tif", + TESTING_DIR "/hebrew.png"), + ::testing::Values(TESSDATA_DIR "_best"))); + +INSTANTIATE_TEST_CASE_P( + TessdataFastEngEuroHebrew, OSDTest, + ::testing::Combine(::testing::Values(0), + ::testing::Values(TESTING_DIR "/phototest.tif", + TESTING_DIR "/eurotext.tif", + TESTING_DIR "/hebrew.png"), + ::testing::Values(TESSDATA_DIR "_fast"))); + +INSTANTIATE_TEST_CASE_P( + TessdataFastRotated90, OSDTest, + ::testing::Combine(::testing::Values(90), + ::testing::Values(TESTING_DIR + "/phototest-rotated-R.png"), + ::testing::Values(TESSDATA_DIR "_fast"))); + +INSTANTIATE_TEST_CASE_P( + TessdataFastRotated180, OSDTest, + ::testing::Combine(::testing::Values(180), + ::testing::Values(TESTING_DIR + "/phototest-rotated-180.png"), + ::testing::Values(TESSDATA_DIR "_fast"))); + +INSTANTIATE_TEST_CASE_P( + TessdataFastRotated270, OSDTest, + ::testing::Combine(::testing::Values(270), + ::testing::Values(TESTING_DIR + "/phototest-rotated-L.png"), + ::testing::Values(TESSDATA_DIR "_fast"))); + +INSTANTIATE_TEST_CASE_P( + TessdataFastDevaRotated270, OSDTest, + ::testing::Combine(::testing::Values(270), + ::testing::Values(TESTING_DIR + "/devatest-rotated-270.png"), + ::testing::Values(TESSDATA_DIR "_fast"))); + +INSTANTIATE_TEST_CASE_P( + TessdataFastDeva, OSDTest, + ::testing::Combine(::testing::Values(0), + ::testing::Values(TESTING_DIR "/devatest.png"), + ::testing::Values(TESSDATA_DIR "_fast"))); } // namespace diff --git a/unittest/pagesegmode_test.cc b/unittest/pagesegmode_test.cc index 7e9895db85..97fe4f95c9 100644 --- a/unittest/pagesegmode_test.cc +++ b/unittest/pagesegmode_test.cc @@ -10,20 +10,14 @@ namespace { class PageSegModeTest : public testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessdataPath() { - return file::JoinPath(FLAGS_test_srcdir, - "tessdata"); + return file::JoinPath(FLAGS_test_srcdir, "tessdata"); } - PageSegModeTest() { - src_pix_ = NULL; - } - ~PageSegModeTest() { - pixDestroy(&src_pix_); - } + PageSegModeTest() { src_pix_ = NULL; } + ~PageSegModeTest() { pixDestroy(&src_pix_); } void SetImage(const char* filename) { pixDestroy(&src_pix_); @@ -34,26 +28,26 @@ class PageSegModeTest : public testing::Test { // Tests that the given rectangle produces exactly the given text in the // given segmentation mode (after chopping off the last 2 newlines.) - void VerifyRectText(tesseract::PageSegMode mode, const char* str, - int left, int top, int width, int height) { + void VerifyRectText(tesseract::PageSegMode mode, const char* str, int left, + int top, int width, int height) { api_.SetPageSegMode(mode); api_.SetRectangle(left, top, width, height); char* result = api_.GetUTF8Text(); chomp_string(result); chomp_string(result); EXPECT_STREQ(str, result); - delete [] result; + delete[] result; } // Tests that the given rectangle does NOT produce the given text in the // given segmentation mode. - void NotRectText(tesseract::PageSegMode mode, const char* str, - int left, int top, int width, int height) { + void NotRectText(tesseract::PageSegMode mode, const char* str, int left, + int top, int width, int height) { api_.SetPageSegMode(mode); api_.SetRectangle(left, top, width, height); char* result = api_.GetUTF8Text(); EXPECT_STRNE(str, result); - delete [] result; + delete[] result; } Pix* src_pix_; @@ -66,26 +60,21 @@ class PageSegModeTest : public testing::Test { TEST_F(PageSegModeTest, WordTest) { SetImage("segmodeimg.tif"); // Test various rectangles around the inverse page number. - VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", - 1482, 146, 72, 44); - VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", - 1474, 134, 82, 72); - VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", - 1459, 116, 118, 112); + VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1482, 146, 72, 44); + VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1474, 134, 82, 72); + VerifyRectText(tesseract::PSM_SINGLE_WORD, "183", 1459, 116, 118, 112); // Test a random pair of words as a line - VerifyRectText(tesseract::PSM_SINGLE_LINE, "What should", - 1119, 621, 245, 54); + VerifyRectText(tesseract::PSM_SINGLE_LINE, "What should", 1119, 621, 245, 54); // Test a random pair of words as a word - VerifyRectText(tesseract::PSM_SINGLE_WORD, "Whatshould", - 1119, 621, 245, 54); + VerifyRectText(tesseract::PSM_SINGLE_WORD, "Whatshould", 1119, 621, 245, 54); // Test single block mode. - VerifyRectText(tesseract::PSM_SINGLE_BLOCK, "both the\nfrom the", - 181, 676, 179, 104); + VerifyRectText(tesseract::PSM_SINGLE_BLOCK, "both the\nfrom the", 181, 676, + 179, 104); // But doesn't work in line or word mode. - NotRectText(tesseract::PSM_SINGLE_LINE, "both the\nfrom the", - 181, 676, 179, 104); - NotRectText(tesseract::PSM_SINGLE_WORD, "both the\nfrom the", - 181, 676, 179, 104); + NotRectText(tesseract::PSM_SINGLE_LINE, "both the\nfrom the", 181, 676, 179, + 104); + NotRectText(tesseract::PSM_SINGLE_WORD, "both the\nfrom the", 181, 676, 179, + 104); } } // namespace diff --git a/unittest/pango_font_info_test.cc b/unittest/pango_font_info_test.cc index 190b2bda01..093ddf94ad 100644 --- a/unittest/pango_font_info_test.cc +++ b/unittest/pango_font_info_test.cc @@ -14,19 +14,24 @@ DECLARE_BOOL_PARAM_FLAG(use_only_legacy_fonts); namespace { -using tesseract::FontUtils; using tesseract::File; +using tesseract::FontUtils; using tesseract::PangoFontInfo; // Fonts in testdata directory -const char* kExpectedFontNames[] = { - "Arab", "Arial Bold Italic", "DejaVu Sans Ultra-Light", "Lohit Hindi", +const char* kExpectedFontNames[] = {"Arab", + "Arial Bold Italic", + "DejaVu Sans Ultra-Light", + "Lohit Hindi", #if PANGO_VERSION <= 12005 - "Times New Roman", + "Times New Roman", #else - "Times New Roman,", // Pango v1.36.2 requires a trailing ',' + "Times New Roman,", // Pango v1.36.2 + // requires a trailing + // ',' #endif - "UnBatang", "Verdana"}; + "UnBatang", + "Verdana"}; // Sample text used in tests. const char kArabicText[] = "والفكر والصراع 1234,\nوالفكر والصراع"; @@ -36,18 +41,17 @@ const char kKorText[] = "이는 것으로"; // Hindi words containing illegal vowel sequences. const char* kBadlyFormedHinWords[] = { #if PANGO_VERSION <= 12005 - "उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस", + "उपयोक्ताो", "नहीें", "कहीअे", "पत्रिाका", "छह्णाीस", #endif - // Pango v1.36.2 will render the above words even though they are invalid. - "प्रंात", NULL }; + // Pango v1.36.2 will render the above words even though they are invalid. + "प्रंात", NULL}; class PangoFontInfoTest : public ::testing::Test { protected: // Creates a fake fonts.conf file that points to the testdata fonts for // fontconfig to initialize with. static void SetUpTestCase() { - FLAGS_fonts_dir = File::JoinPath( - FLAGS_test_srcdir, "testdata"); + FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata"); FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir; FLAGS_use_only_legacy_fonts = false; } @@ -111,12 +115,11 @@ TEST_F(PangoFontInfoTest, CanRenderString) { TEST_F(PangoFontInfoTest, CanRenderLigature) { font_info_.ParseFontDescriptionName("Arab 12"); const char kArabicLigature[] = "لا"; - EXPECT_TRUE(font_info_.CanRenderString(kArabicLigature, - strlen(kArabicLigature))); + EXPECT_TRUE( + font_info_.CanRenderString(kArabicLigature, strlen(kArabicLigature))); printf("Next word\n"); - EXPECT_TRUE(font_info_.CanRenderString(kArabicText, - strlen(kArabicText))); + EXPECT_TRUE(font_info_.CanRenderString(kArabicText, strlen(kArabicText))); } TEST_F(PangoFontInfoTest, CannotRenderUncoveredString) { @@ -142,9 +145,9 @@ TEST_F(PangoFontInfoTest, CanDropUncoveredChars) { // Dont drop non-letter characters like word joiners. const char* kJoiners[] = { - "\u2060", // U+2060 (WJ) - "\u200C", // U+200C (ZWJ) - "\u200D" // U+200D (ZWNJ) + "\u2060", // U+2060 (WJ) + "\u200C", // U+200C (ZWJ) + "\u200D" // U+200D (ZWNJ) }; for (int i = 0; i < ARRAYSIZE(kJoiners); ++i) { word = kJoiners[i]; @@ -153,7 +156,6 @@ TEST_F(PangoFontInfoTest, CanDropUncoveredChars) { } } - // ------------------------ FontUtils ------------------------------------ class FontUtilsTest : public ::testing::Test { @@ -161,8 +163,7 @@ class FontUtilsTest : public ::testing::Test { // Creates a fake fonts.conf file that points to the testdata fonts for // fontconfig to initialize with. static void SetUpTestCase() { - FLAGS_fonts_dir = File::JoinPath( - FLAGS_test_srcdir, "testdata"); + FLAGS_fonts_dir = File::JoinPath(FLAGS_test_srcdir, "testdata"); FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir; } @@ -229,8 +230,8 @@ TEST_F(FontUtilsTest, DoesFindBestFonts) { } TEST_F(FontUtilsTest, DoesSelectFont) { - const char* kLangText[] = { kArabicText, kEngText, kHinText, kKorText, NULL }; - const char* kLangNames[] = { "Arabic", "English", "Hindi", "Korean", NULL }; + const char* kLangText[] = {kArabicText, kEngText, kHinText, kKorText, NULL}; + const char* kLangNames[] = {"Arabic", "English", "Hindi", "Korean", NULL}; for (int i = 0; kLangText[i] != NULL; ++i) { SCOPED_TRACE(kLangNames[i]); std::vector graphemes; @@ -246,8 +247,7 @@ TEST_F(FontUtilsTest, DoesFailToSelectFont) { const char kMixedScriptText[] = "पिताने विवाह की | والفكر والصراع"; std::vector graphemes; string selected_font; - EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, - strlen(kMixedScriptText), + EXPECT_FALSE(FontUtils::SelectFont(kMixedScriptText, strlen(kMixedScriptText), &selected_font, &graphemes)); } @@ -271,17 +271,16 @@ TEST_F(FontUtilsTest, GetAllRenderableCharacters) { FontUtils::GetAllRenderableCharacters(selected_fonts, &unicode_mask); EXPECT_TRUE(unicode_mask['1']); EXPECT_TRUE(unicode_mask[kHindiChar]); - EXPECT_FALSE(unicode_mask['A']); // Lohit doesn't render English, - EXPECT_FALSE(unicode_mask[kArabicChar]); // or Arabic, - EXPECT_FALSE(unicode_mask[kMongolianChar]); // or Mongolian, - EXPECT_FALSE(unicode_mask[kOghamChar]); // or Ogham. + EXPECT_FALSE(unicode_mask['A']); // Lohit doesn't render English, + EXPECT_FALSE(unicode_mask[kArabicChar]); // or Arabic, + EXPECT_FALSE(unicode_mask[kMongolianChar]); // or Mongolian, + EXPECT_FALSE(unicode_mask[kOghamChar]); // or Ogham. // Check that none of the included fonts cover the Mongolian or Ogham space // characters. for (int f = 0; f < ARRAYSIZE(kExpectedFontNames); ++f) { SCOPED_TRACE(absl::StrCat("Testing ", kExpectedFontNames[f])); - FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], - &unicode_mask); + FontUtils::GetAllRenderableCharacters(kExpectedFontNames[f], &unicode_mask); EXPECT_FALSE(unicode_mask[kOghamChar]); EXPECT_FALSE(unicode_mask[kMongolianChar]); } diff --git a/unittest/paragraphs_test.cc b/unittest/paragraphs_test.cc index f32d9f97de..fc8e080374 100644 --- a/unittest/paragraphs_test.cc +++ b/unittest/paragraphs_test.cc @@ -9,12 +9,11 @@ namespace { // anonymous namespace // Functions for making monospace ASCII trial text for the paragraph detector. const tesseract::ParagraphJustification kLeft = tesseract::JUSTIFICATION_LEFT; -const tesseract::ParagraphJustification kCenter - = tesseract::JUSTIFICATION_CENTER; -const tesseract::ParagraphJustification kRight - = tesseract::JUSTIFICATION_RIGHT; -const tesseract::ParagraphJustification kUnknown - = tesseract::JUSTIFICATION_UNKNOWN; +const tesseract::ParagraphJustification kCenter = + tesseract::JUSTIFICATION_CENTER; +const tesseract::ParagraphJustification kRight = tesseract::JUSTIFICATION_RIGHT; +const tesseract::ParagraphJustification kUnknown = + tesseract::JUSTIFICATION_UNKNOWN; enum TextModelInputType { PCONT = 0, // Continuation line of a paragraph (default). @@ -23,7 +22,7 @@ enum TextModelInputType { }; struct TextAndModel { - const char *ascii; + const char* ascii; TextModelInputType model_type; // fields corresponding to PARA (see ccstruct/ocrpara.h) @@ -34,13 +33,13 @@ struct TextAndModel { // Imagine that the given text is typewriter ASCII with each character ten // pixels wide and twenty pixels high and return an appropriate row_info. -void AsciiToRowInfo(const char *text, int row_number, - tesseract::RowInfo *info) { +void AsciiToRowInfo(const char* text, int row_number, + tesseract::RowInfo* info) { const int kCharWidth = 10; const int kLineSpace = 30; info->text = text; - info->has_leaders = strstr(text, "...") != NULL || - strstr(text, ". . .") != NULL; + info->has_leaders = + strstr(text, "...") != NULL || strstr(text, ". . .") != NULL; info->has_drop_cap = false; info->pix_ldistance = info->pix_rdistance = 0; info->average_interword_space = kCharWidth; @@ -50,20 +49,21 @@ void AsciiToRowInfo(const char *text, int row_number, std::vector words = absl::StrSplit(text, ' ', absl::SkipEmpty()); info->num_words = words.size(); - if (info->num_words < 1) - return; + if (info->num_words < 1) return; info->lword_text = words[0].c_str(); info->rword_text = words[words.size() - 1].c_str(); int lspace = 0; - while (lspace < info->text.size() && text[lspace] == ' ') { lspace++; } + while (lspace < info->text.size() && text[lspace] == ' ') { + lspace++; + } int rspace = 0; while (rspace < info->text.size() && text[info->text.size() - rspace - 1] == ' ') { rspace++; } - int top = - kLineSpace * row_number; + int top = -kLineSpace * row_number; int bottom = top - kLineSpace; int row_right = kCharWidth * info->text.size(); int lword_width = kCharWidth * info->lword_text.size(); @@ -71,25 +71,19 @@ void AsciiToRowInfo(const char *text, int row_number, info->pix_ldistance = lspace * kCharWidth; info->pix_rdistance = rspace * kCharWidth; info->lword_box = - TBOX(info->pix_ldistance, bottom, - info->pix_ldistance + lword_width, top); - info->rword_box = - TBOX(row_right - info->pix_rdistance - rword_width, bottom, - row_right - info->pix_rdistance, top); + TBOX(info->pix_ldistance, bottom, info->pix_ldistance + lword_width, top); + info->rword_box = TBOX(row_right - info->pix_rdistance - rword_width, bottom, + row_right - info->pix_rdistance, top); tesseract::LeftWordAttributes( - NULL, NULL, info->lword_text, - &info->lword_indicates_list_item, - &info->lword_likely_starts_idea, - &info->lword_likely_ends_idea); + NULL, NULL, info->lword_text, &info->lword_indicates_list_item, + &info->lword_likely_starts_idea, &info->lword_likely_ends_idea); tesseract::RightWordAttributes( - NULL, NULL, info->rword_text, - &info->rword_indicates_list_item, - &info->rword_likely_starts_idea, - &info->rword_likely_ends_idea); + NULL, NULL, info->rword_text, &info->rword_indicates_list_item, + &info->rword_likely_starts_idea, &info->rword_likely_ends_idea); } -void MakeAsciiRowInfos(const TextAndModel *row_infos, int n, - GenericVector *output) { +void MakeAsciiRowInfos(const TextAndModel* row_infos, int n, + GenericVector* output) { output->clear(); tesseract::RowInfo info; for (int i = 0; i < n; i++) { @@ -100,8 +94,8 @@ void MakeAsciiRowInfos(const TextAndModel *row_infos, int n, // Given n rows of reference ground truth, evaluate whether the n rows // of PARA * pointers yield the same paragraph breakpoints. -void EvaluateParagraphDetection(const TextAndModel *correct, int n, - const GenericVector &detector_output) { +void EvaluateParagraphDetection(const TextAndModel* correct, int n, + const GenericVector& detector_output) { int incorrect_breaks = 0; int missed_breaks = 0; int poorly_matched_models = 0; @@ -111,10 +105,8 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n, for (int i = 1; i < n; i++) { bool has_break = correct[i].model_type != PCONT; bool detected_break = (detector_output[i - 1] != detector_output[i]); - if (has_break && !detected_break) - missed_breaks++; - if (detected_break && !has_break) - incorrect_breaks++; + if (has_break && !detected_break) missed_breaks++; + if (detected_break && !has_break) incorrect_breaks++; if (has_break) { if (correct[i].model_type == PNONE) { if (detector_output[i]->model != NULL) { @@ -131,8 +123,7 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n, detector_output[i]->is_very_first_or_continuation) { bad_crowns++; } - if (correct[i].is_list_item ^ - detector_output[i]->is_list_item) { + if (correct[i].is_list_item ^ detector_output[i]->is_list_item) { bad_list_items++; } } @@ -180,16 +171,16 @@ void EvaluateParagraphDetection(const TextAndModel *correct, int n, } } -void TestParagraphDetection(const TextAndModel *correct, int num_rows) { +void TestParagraphDetection(const TextAndModel* correct, int num_rows) { GenericVector row_infos; - GenericVector row_owners; + GenericVector row_owners; PARA_LIST paragraphs; - GenericVector models; + GenericVector models; MakeAsciiRowInfos(correct, num_rows, &row_infos); int debug_level(3); - tesseract::DetectParagraphs(debug_level, &row_infos, &row_owners, - ¶graphs, &models); + tesseract::DetectParagraphs(debug_level, &row_infos, &row_owners, ¶graphs, + &models); EvaluateParagraphDetection(correct, num_rows, row_owners); models.delete_data_pointers(); } @@ -220,15 +211,15 @@ TEST(ParagraphsTest, ListItemsIdentified) { typedef ParagraphModel PModel; const TextAndModel kTwoSimpleParagraphs[] = { - {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"This paragraph starts at the top"}, - {"of the page and takes 3 lines. "}, - {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is not a continuation "}, - {"from a previous page, as it is "}, - {"indented just like this second "}, - {"paragraph. "}, + {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"This paragraph starts at the top"}, + {"of the page and takes 3 lines. "}, + {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"which indicates that the first "}, + {"paragraph is not a continuation "}, + {"from a previous page, as it is "}, + {"indented just like this second "}, + {"paragraph. "}, }; TEST(ParagraphsTest, TestSimpleParagraphDetection) { @@ -237,33 +228,34 @@ TEST(ParagraphsTest, TestSimpleParagraphDetection) { } const TextAndModel kFewCluesWithCrown[] = { - {"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), true}, - {"of the page and takes two lines."}, - {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is a continuation from"}, - {"a previous page, as it is "}, - {"indented just like this second "}, - {"paragraph. "}, + {"This paragraph starts at the top", PSTART, PModel(kLeft, 0, 20, 0, 0), + true}, + {"of the page and takes two lines."}, + {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"which indicates that the first "}, + {"paragraph is a continuation from"}, + {"a previous page, as it is "}, + {"indented just like this second "}, + {"paragraph. "}, }; - TEST(ParagraphsTest, TestFewCluesWithCrown) { TestParagraphDetection(kFewCluesWithCrown, ABSL_ARRAYSIZE(kFewCluesWithCrown)); } const TextAndModel kCrownedParagraph[] = { - {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true}, - {"often not indented as the rest "}, - {"of the paragraphs are. Nonethe-"}, - {"less it should be counted as the"}, - {"same type of paragraph. "}, - {" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"graphs are both indented two "}, - {"spaces. "}, - {" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"fmt refers to as a 'crown.' "}, + {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), + true}, + {"often not indented as the rest "}, + {"of the paragraphs are. Nonethe-"}, + {"less it should be counted as the"}, + {"same type of paragraph. "}, + {" The second and third para- ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"graphs are both indented two "}, + {"spaces. "}, + {" The first paragraph has what ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"fmt refers to as a 'crown.' "}, }; TEST(ParagraphsTest, TestCrownParagraphDetection) { @@ -271,18 +263,18 @@ TEST(ParagraphsTest, TestCrownParagraphDetection) { } const TextAndModel kFlushLeftParagraphs[] = { - {"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0)}, - {"flush left paragraphs (those"}, - {"with no body indent) are not"}, - {"actually crowns. "}, - {"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0)}, - {"also flush left aligned. Usual-"}, - {"ly, these paragraphs are set"}, - {"apart vertically by some white-"}, - {"space, but you can also detect"}, - {"them by observing the big empty"}, - {"space at the ends of the para-"}, - {"graphs. "}, + {"It is sometimes the case that", PSTART, PModel(kLeft, 0, 0, 0, 0)}, + {"flush left paragraphs (those"}, + {"with no body indent) are not"}, + {"actually crowns. "}, + {"Instead, further paragraphs are", PSTART, PModel(kLeft, 0, 0, 0, 0)}, + {"also flush left aligned. Usual-"}, + {"ly, these paragraphs are set"}, + {"apart vertically by some white-"}, + {"space, but you can also detect"}, + {"them by observing the big empty"}, + {"space at the ends of the para-"}, + {"graphs. "}, }; TEST(ParagraphsText, TestRealFlushLeftParagraphs) { @@ -291,46 +283,45 @@ TEST(ParagraphsText, TestRealFlushLeftParagraphs) { }; const TextAndModel kSingleFullPageContinuation[] = { - {"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true}, - {"continuation. It flows from"}, - {"line to line, using the full"}, - {"column width with no clear"}, - {"paragraph break, because it"}, - {"actually doesn't have one. It"}, - {"is the middle of one monster"}, - {"paragraph continued from the"}, - {"previous page and continuing"}, - {"onto the next page. There-"}, - {"fore, it ends up getting"}, - {"marked as a crown and then"}, - {"getting re-marked as any ex-"}, - {"isting model. Not great, but"}, + {"sometimes a page is one giant", PSTART, PModel(kLeft, 0, 20, 0, 0), true}, + {"continuation. It flows from"}, + {"line to line, using the full"}, + {"column width with no clear"}, + {"paragraph break, because it"}, + {"actually doesn't have one. It"}, + {"is the middle of one monster"}, + {"paragraph continued from the"}, + {"previous page and continuing"}, + {"onto the next page. There-"}, + {"fore, it ends up getting"}, + {"marked as a crown and then"}, + {"getting re-marked as any ex-"}, + {"isting model. Not great, but"}, }; TEST(ParagraphsTest, TestSingleFullPageContinuation) { - const TextAndModel *correct = kSingleFullPageContinuation; + const TextAndModel* correct = kSingleFullPageContinuation; int num_rows = ABSL_ARRAYSIZE(kSingleFullPageContinuation); GenericVector row_infos; - GenericVector row_owners; + GenericVector row_owners; PARA_LIST paragraphs; - GenericVector models; + GenericVector models; models.push_back(new ParagraphModel(kLeft, 0, 20, 0, 10)); MakeAsciiRowInfos(correct, num_rows, &row_infos); - tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs, - &models); + tesseract::DetectParagraphs(3, &row_infos, &row_owners, ¶graphs, &models); EvaluateParagraphDetection(correct, num_rows, row_owners); models.delete_data_pointers(); } const TextAndModel kRightAligned[] = { - {"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0)}, - {" uncommon in Left-to-Right"}, - {" languages, but they do"}, - {" exist."}, - {" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0)}, - {" horribly tiny paragraphs in"}, - {" tables on which we have no"}, - {" chance anyways."}, + {"Right-aligned paragraphs are", PSTART, PModel(kRight, 0, 0, 0, 0)}, + {" uncommon in Left-to-Right"}, + {" languages, but they do"}, + {" exist."}, + {" Mostly, however, they're", PSTART, PModel(kRight, 0, 0, 0, 0)}, + {" horribly tiny paragraphs in"}, + {" tables on which we have no"}, + {" chance anyways."}, }; TEST(ParagraphsTest, TestRightAlignedParagraph) { @@ -338,66 +329,71 @@ TEST(ParagraphsTest, TestRightAlignedParagraph) { } const TextAndModel kTinyParagraphs[] = { - {" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"obvious paragraph text, you might"}, - {"find short exchanges of dialogue "}, - {"between characters. "}, - {" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"mark a new paragraph whenever one"}, - {"of the statistics (left, right or"}, - {"center) changes from one text-"}, - {"line to the next. Such an"}, - {"approach would misclassify the"}, - {"tiny paragraphs above as a single"}, - {"paragraph. "}, + {" Occasionally, interspersed with", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"obvious paragraph text, you might"}, + {"find short exchanges of dialogue "}, + {"between characters. "}, + {" 'Oh?' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {" 'Don't be confused!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {" 'Not me!' ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {" One naive approach would be to ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"mark a new paragraph whenever one"}, + {"of the statistics (left, right or"}, + {"center) changes from one text-"}, + {"line to the next. Such an"}, + {"approach would misclassify the"}, + {"tiny paragraphs above as a single"}, + {"paragraph. "}, }; TEST(ParagraphsTest, TestTinyParagraphs) { TestParagraphDetection(kTinyParagraphs, ABSL_ARRAYSIZE(kTinyParagraphs)); } - const TextAndModel kComplexPage1[] = { - {" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)}, - {" Centered Title "}, - {" Paragraph Detection "}, - {" OCR TEAM "}, - {" 10 November 2010 "}, - {" ", PNONE}, - {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"This paragraph starts at the top"}, - {"of the page and takes 3 lines. "}, - {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is not a continuation "}, - {"from a previous page, as it is "}, - {"indented just like this second "}, - {"paragraph. "}, - {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), true}, - {" looks like the prior text "}, - {" but it is indented more "}, - {" and is fully justified. "}, - {" So how does one deal with ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"centered text, block quotes, "}, - {"normal paragraphs, and lists "}, - {"like what follows? "}, - {"1. Make a plan. ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {"2. Use a heuristic, for example,", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" looking for lines where the "}, - {" first word of the next line "}, - {" would fit on the previous "}, - {" line. "}, - {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" Python and try it out. "}, - {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" mistakes. "}, - {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" For extra painful penalty work", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"you can try to identify source "}, - {"code. Ouch! "}, + {" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)}, + {" Centered Title "}, + {" Paragraph Detection "}, + {" OCR TEAM "}, + {" 10 November 2010 "}, + {" ", PNONE}, + {" Look here, I have a paragraph.", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"This paragraph starts at the top"}, + {"of the page and takes 3 lines. "}, + {" Here I have a second paragraph", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"which indicates that the first "}, + {"paragraph is not a continuation "}, + {"from a previous page, as it is "}, + {"indented just like this second "}, + {"paragraph. "}, + {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), + true}, + {" looks like the prior text "}, + {" but it is indented more "}, + {" and is fully justified. "}, + {" So how does one deal with ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"centered text, block quotes, "}, + {"normal paragraphs, and lists "}, + {"like what follows? "}, + {"1. Make a plan. ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {"2. Use a heuristic, for example,", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" looking for lines where the "}, + {" first word of the next line "}, + {" would fit on the previous "}, + {" line. "}, + {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" Python and try it out. "}, + {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" mistakes. "}, + {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" For extra painful penalty work", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"you can try to identify source "}, + {"code. Ouch! "}, }; TEST(ParagraphsTest, TestComplexPage1) { @@ -406,41 +402,47 @@ TEST(ParagraphsTest, TestComplexPage1) { // The same as above, but wider. const TextAndModel kComplexPage2[] = { - {" Awesome ", PSTART, PModel(kCenter, 0, 0, 0, 0)}, - {" Centered Title "}, - {" Paragraph Detection "}, - {" OCR TEAM "}, - {" 10 November 2010 "}, - {" ", PNONE}, - {" Look here, I have a paragraph. ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"This paragraph starts at the top of"}, - {"the page and takes 3 lines. "}, - {" Here I have a second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"which indicates that the first "}, - {"paragraph is not a continuation "}, - {"from a previous page, as it is in- "}, - {"dented just like this second para- "}, - {"graph. "}, - {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), true}, - {" looks like the prior text "}, - {" but it is indented more "}, - {" and is fully justified. "}, - {" So how does one deal with center-", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"ed text, block quotes, normal para-"}, - {"graphs, and lists like what follow?"}, - {"1. Make a plan. "}, // BUG!! - {"2. Use a heuristic, for example, ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" looking for lines where the "}, - {" first word of the next line "}, - {" would fit on the previous line. "}, - {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" Python and try it out. "}, - {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" mistakes. "}, - {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), false, true}, - {" For extra painful penalty work ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"you can try to identify source "}, - {"code. Ouch! "}, + {" Awesome ", PSTART, + PModel(kCenter, 0, 0, 0, 0)}, + {" Centered Title "}, + {" Paragraph Detection "}, + {" OCR TEAM "}, + {" 10 November 2010 "}, + {" ", PNONE}, + {" Look here, I have a paragraph. ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"This paragraph starts at the top of"}, + {"the page and takes 3 lines. "}, + {" Here I have a second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"which indicates that the first "}, + {"paragraph is not a continuation "}, + {"from a previous page, as it is in- "}, + {"dented just like this second para- "}, + {"graph. "}, + {" Here is a block quote. It ", PSTART, PModel(kLeft, 30, 0, 0, 0), + true}, + {" looks like the prior text "}, + {" but it is indented more "}, + {" and is fully justified. "}, + {" So how does one deal with center-", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"ed text, block quotes, normal para-"}, + {"graphs, and lists like what follow?"}, + {"1. Make a plan. "}, // BUG!! + {"2. Use a heuristic, for example, ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" looking for lines where the "}, + {" first word of the next line "}, + {" would fit on the previous line. "}, + {"8. Try to implement the plan in ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" Python and try it out. "}, + {"4. Determine how to fix the ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" mistakes. "}, + {"5. Repeat. ", PSTART, PModel(kLeft, 0, 0, 30, 0), + false, true}, + {" For extra painful penalty work ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"you can try to identify source "}, + {"code. Ouch! "}, }; TEST(ParagraphsTest, TestComplexPage2) { @@ -448,14 +450,15 @@ TEST(ParagraphsTest, TestComplexPage2) { } const TextAndModel kSubtleCrown[] = { - {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), true}, - {"often not indented as the rest "}, - {"of the paragraphs are. Nonethe-"}, - {"less it should be counted as the"}, - {"same type of paragraph. "}, - {" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"should suffice. "}, - {" 1235 ", PNONE}, + {"The first paragraph on a page is", PSTART, PModel(kLeft, 0, 20, 0, 0), + true}, + {"often not indented as the rest "}, + {"of the paragraphs are. Nonethe-"}, + {"less it should be counted as the"}, + {"same type of paragraph. "}, + {" Even a short second paragraph ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, + {"should suffice. "}, + {" 1235 ", PNONE}, }; TEST(ParagraphsTest, TestSubtleCrown) { @@ -467,39 +470,43 @@ TEST(ParagraphsTest, TestStrayLineInBlock) { } const TextAndModel kUnlvRep3AO[] = { - {" Defined contribution plans cover employees in Australia, New", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"Zealand, Spain, the United Kingdom and some U.S. subsidiaries. "}, - {"In addition, employees in the U.S. are eligible to participate in "}, - {"defined contribution plans (Employee Savings Plans) by contribut-"}, - {"ing a portion of their compensation. The Company matches com- "}, - {"pensation, depending on Company profit levels. Contributions "}, - {"charged to income for defined contribution plans were $92 in "}, - {"1993, $98 in 1992 and $89 in 1991. "}, - {" In addition to providing pension benefits, the Company pro- ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"vides certain health care and life insurance benefits to retired "}, - {"employees. As discussed in Note A, the Company adopted FASB "}, - {"Statement No. 106 effective January 1, 1992. Previously, the "}, - {"Company recognized the cost of providing these benefits as the "}, - {"benefits were paid. These pretax costs amounted to $53 in 1991. "}, - {"The Company continues to fund most of the cost of these medical "}, - {"and life insurance benefits in the year incurred. "}, - {" The U.S. plan covering the parent company is the largest plan.", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"It provides medical and life insurance benefits including hospital, "}, - {"physicians’ services and major medical expense benefits and life "}, - {"insurance benefits. The plan provides benefits supplemental to "}, - {"Medicare after retirees are eligible for these benefits. The cost of "}, - {"these benefits are shared by the Company and the retiree, with the "}, - {"Company portion increasing as the retiree has increased years of "}, - {"credited service. The Company has the ability to change these "}, - {"benefits at any time. "}, - {" Effective October 1993, the Company amended its health ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"benefits plan in the U.S. to cap the cost absorbed by the Company "}, - {"at approximately twice the 1993 cost per person for employees who"}, - {"retire after December 31, 1993. The effect of this amendment was "}, - {"to reduce the December 31, 1993 accumulated postretirement "}, - {"benefit obligation by $327. It also reduced the net periodic postre- "}, - {"tirement cost by $21 for 1993 and is estimated to reduce this cost "}, - {"for 1994 by approximately $83. "}, + {" Defined contribution plans cover employees in Australia, New", PSTART, + PModel(kLeft, 0, 50, 0, 0)}, + {"Zealand, Spain, the United Kingdom and some U.S. subsidiaries. "}, + {"In addition, employees in the U.S. are eligible to participate in "}, + {"defined contribution plans (Employee Savings Plans) by contribut-"}, + {"ing a portion of their compensation. The Company matches com- "}, + {"pensation, depending on Company profit levels. Contributions "}, + {"charged to income for defined contribution plans were $92 in "}, + {"1993, $98 in 1992 and $89 in 1991. "}, + {" In addition to providing pension benefits, the Company pro- ", PSTART, + PModel(kLeft, 0, 50, 0, 0)}, + {"vides certain health care and life insurance benefits to retired "}, + {"employees. As discussed in Note A, the Company adopted FASB "}, + {"Statement No. 106 effective January 1, 1992. Previously, the "}, + {"Company recognized the cost of providing these benefits as the "}, + {"benefits were paid. These pretax costs amounted to $53 in 1991. "}, + {"The Company continues to fund most of the cost of these medical "}, + {"and life insurance benefits in the year incurred. "}, + {" The U.S. plan covering the parent company is the largest plan.", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"It provides medical and life insurance benefits including hospital, "}, + {"physicians’ services and major medical expense benefits and life "}, + {"insurance benefits. The plan provides benefits supplemental to "}, + {"Medicare after retirees are eligible for these benefits. The cost of "}, + {"these benefits are shared by the Company and the retiree, with the "}, + {"Company portion increasing as the retiree has increased years of "}, + {"credited service. The Company has the ability to change these "}, + {"benefits at any time. "}, + {" Effective October 1993, the Company amended its health ", PSTART, + PModel(kLeft, 0, 50, 0, 0)}, + {"benefits plan in the U.S. to cap the cost absorbed by the Company "}, + {"at approximately twice the 1993 cost per person for employees who"}, + {"retire after December 31, 1993. The effect of this amendment was "}, + {"to reduce the December 31, 1993 accumulated postretirement "}, + {"benefit obligation by $327. It also reduced the net periodic postre- "}, + {"tirement cost by $21 for 1993 and is estimated to reduce this cost "}, + {"for 1994 by approximately $83. "}, }; TEST(ParagraphsTest, TestUnlvInsurance) { @@ -512,19 +519,19 @@ TEST(ParagraphsTest, TestUnlvInsurance) { // paragraph or two. // This example comes from Volume 9886293, Page 5 const TextAndModel kTableOfContents[] = { - {"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, - {" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {"1 Hmong People ........... 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Hmong Origins . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Language . . . . . . . 1", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Proverbs . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Discussion . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Riddles . . . . . . . 2", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Discussion . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Appearance . . . . . 3", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Hmong History . . . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Hmong in SE Asia . . . 4", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Hmong in the West . . .5", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Hmong in the USA . . . 5", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, + {" Discussion . . . . 6", PSTART, PModel(kUnknown, 0, 0, 0, 0)}, }; TEST(ParagraphsTest, TestSplitsOutLeaderLines) { @@ -532,31 +539,34 @@ TEST(ParagraphsTest, TestSplitsOutLeaderLines) { } const TextAndModel kTextWithSourceCode[] = { - {" A typical page of a programming book may contain", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"examples of source code to exemplify an algorithm "}, - {"being described in prose. Such examples should be"}, - {"rendered as lineated text, meaning text with "}, - {"explicit line breaks but without extra inter-line "}, - {"spacing. Accidentally finding stray paragraphs in"}, - {"source code would lead to a bad reading experience"}, - {"when the text is re-flowed. "}, - {" Let's show this by describing the function fact-", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"orial. Factorial is a simple recursive function "}, - {"which grows very quickly. So quickly, in fact, "}, - {"that the typical C implementation will only work "}, - {"for values less than about 12: "}, - {" ", PNONE}, - {" # Naive implementation in C "}, - {" int factorial(int n) { "}, - {" if (n < 2) "}, - {" return 1; "}, - {" return n * factorial(n - 1); "}, - {" } "}, - {" "}, - {" The C programming language does not have built- ", PSTART, PModel(kLeft, 0, 20, 0, 0)}, - {"in support for detecting integer overflow, so this"}, - {"naive implementation simply returns random values "}, - {"if even a moderate sized n is provided. "}, + {" A typical page of a programming book may contain", PSTART, + PModel(kLeft, 0, 20, 0, 0)}, + {"examples of source code to exemplify an algorithm "}, + {"being described in prose. Such examples should be"}, + {"rendered as lineated text, meaning text with "}, + {"explicit line breaks but without extra inter-line "}, + {"spacing. Accidentally finding stray paragraphs in"}, + {"source code would lead to a bad reading experience"}, + {"when the text is re-flowed. "}, + {" Let's show this by describing the function fact-", PSTART, + PModel(kLeft, 0, 20, 0, 0)}, + {"orial. Factorial is a simple recursive function "}, + {"which grows very quickly. So quickly, in fact, "}, + {"that the typical C implementation will only work "}, + {"for values less than about 12: "}, + {" ", PNONE}, + {" # Naive implementation in C "}, + {" int factorial(int n) { "}, + {" if (n < 2) "}, + {" return 1; "}, + {" return n * factorial(n - 1); "}, + {" } "}, + {" "}, + {" The C programming language does not have built- ", PSTART, + PModel(kLeft, 0, 20, 0, 0)}, + {"in support for detecting integer overflow, so this"}, + {"naive implementation simply returns random values "}, + {"if even a moderate sized n is provided. "}, }; TEST(ParagraphsTest, NotDistractedBySourceCode) { @@ -565,81 +575,103 @@ TEST(ParagraphsTest, NotDistractedBySourceCode) { } const TextAndModel kOldManAndSea[] = { - {"royal palm which are called guano and in it there was a bed, a", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"table, one chair, and a place on the dirt floor to cook with charcoal."}, - {"On the brown walls of the flattened, overlapping leaves of the"}, - {"sturdy fibered guano there was a picture in color of the Sacred"}, - {"Heart of Jesus and another of the Virgin of Cobre. These were"}, - {"relics of his wife. Once there had been a tinted photograph of his"}, - {"wife on the wall but he had taken it down because it made him too"}, - {"lonely to see it and it was on the shelf in the corner under his clean"}, - {"shirt. "}, - {" \"What do you have to eat?\" the boy asked. ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"A pot of yellow rice with fish. Do you want some?\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"No. I will eat at home. Do you want me to make the fire?\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"No. I will make it later on. Or I may eat the rice cold.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"May I take the cast net?\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"Of course.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" There was no cast net and the boy remembered when they had", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"sold it. But they went through this fiction every day. There was no"}, - {"pot of yellow rice and fish and the boy knew this too. "}, - {" \"Eighty-five is a lucky number,\" the old man said. \"How", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"would you like to see me bring one in that dressed out over a thou-"}, - {"sand pounds? "}, - {" \"I'll get the cast net and go for sardines. Will you sit in the sun", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"in the doorway?\" "}, - {" \"Yes. I have yesterday's paper and I will read the baseball.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" The boy did not know whether yesterday's paper was a fiction", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"too. But the old man brought it out from under the bed. "}, - {" \"Pedrico gave it to me at the bodega,\" he explained. ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"I'll be back when I have the sardines. I'll keep yours and mine", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"together on ice and we can share them in the morning. When I"}, - {"come back you can tell me about the baseball.\" "}, - {" \"The Yankees cannot lose.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"But I fear the Indians of Cleveland.\" ", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {" \"Have faith in the Yankees my son. Think of the great Di-", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"Maggio.\" "}, - {" \"I fear both the Tigers of Detroit and the Indians of Cleve-", PSTART, PModel(kLeft, 0, 50, 0, 0)}, - {"land.\" "} -}; + {"royal palm which are called guano and in it there was a bed, a", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"table, one chair, and a place on the dirt floor to cook with charcoal."}, + {"On the brown walls of the flattened, overlapping leaves of the"}, + {"sturdy fibered guano there was a picture in color of the Sacred"}, + {"Heart of Jesus and another of the Virgin of Cobre. These were"}, + {"relics of his wife. Once there had been a tinted photograph of his"}, + {"wife on the wall but he had taken it down because it made him too"}, + {"lonely to see it and it was on the shelf in the corner under his clean"}, + {"shirt. "}, + {" \"What do you have to eat?\" the boy asked. ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"A pot of yellow rice with fish. Do you want some?\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"No. I will eat at home. Do you want me to make the fire?\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"No. I will make it later on. Or I may eat the rice cold.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"May I take the cast net?\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"Of course.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" There was no cast net and the boy remembered when they had", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"sold it. But they went through this fiction every day. There was no"}, + {"pot of yellow rice and fish and the boy knew this too. " + " "}, + {" \"Eighty-five is a lucky number,\" the old man said. \"How", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"would you like to see me bring one in that dressed out over a " + "thou-"}, + {"sand pounds? " + " "}, + {" \"I'll get the cast net and go for sardines. Will you sit in the " + "sun", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"in the doorway?\" " + " "}, + {" \"Yes. I have yesterday's paper and I will read the baseball.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" The boy did not know whether yesterday's paper was a fiction", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"too. But the old man brought it out from under the bed. "}, + {" \"Pedrico gave it to me at the bodega,\" he explained. " + " ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"I'll be back when I have the sardines. I'll keep yours and mine", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"together on ice and we can share them in the morning. When I"}, + {"come back you can tell me about the baseball.\" "}, + {" \"The Yankees cannot lose.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"But I fear the Indians of Cleveland.\" ", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {" \"Have faith in the Yankees my son. Think of the great Di-", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"Maggio.\" "}, + {" \"I fear both the Tigers of Detroit and the Indians of Cleve-", + PSTART, PModel(kLeft, 0, 50, 0, 0)}, + {"land.\" "}}; TEST(ParagraphsTest, NotOverlyAggressiveWithBlockQuotes) { TestParagraphDetection(kOldManAndSea, ABSL_ARRAYSIZE(kOldManAndSea)); } const TextAndModel kNewZealandIndex[] = { - {"Oats, 51 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"O'Brien, Gregory, 175 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Occupational composition, 110,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {" 138 "}, - {"OECD rankings, 155, 172 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Okiato (original capital), 47 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Oil shock: 1974, xxx, 143; 1979,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {" 145 "}, - {"Old Age Pensions, xxii, 89-90 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Old World evils, 77 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Oliver, W. H., 39, 77, 89 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Olssen, Erik, 45, 64, 84 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Olympic Games, 1924, 111, 144 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Once on Chunuk Bair, 149 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Once Were Warriors, xxxiii, 170", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"On—shore whaling, xvi ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Opotiki, xix ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Orakau battle of, xviii, 57 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"O’Regan, Tipene, 170, 198-99 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Organic agriculture, 177 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Orwell, George, 151 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago, xvii, 45, 49-50, 70 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago block, xvii ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Daily Times, 67 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Girls’ High School, xix, 61,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {" 85 "}, - {"Otago gold rushes, 61-63 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Peninsula, xx ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otago Provincial Council, 68 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Otaki, 33 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, - {"Owls Do Cry, 139 ", PSTART, PModel(kLeft, 0, 0, 30, 0)} -}; + {"Oats, 51 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"O'Brien, Gregory, 175 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Occupational composition, 110,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {" 138 "}, + {"OECD rankings, 155, 172 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Okiato (original capital), 47 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Oil shock: 1974, xxx, 143; 1979,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {" 145 "}, + {"Old Age Pensions, xxii, 89-90 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Old World evils, 77 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Oliver, W. H., 39, 77, 89 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Olssen, Erik, 45, 64, 84 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Olympic Games, 1924, 111, 144 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Once on Chunuk Bair, 149 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Once Were Warriors, xxxiii, 170", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"On—shore whaling, xvi ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Opotiki, xix ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Orakau battle of, xviii, 57 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"O’Regan, Tipene, 170, 198-99 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Organic agriculture, 177 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Orwell, George, 151 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otago, xvii, 45, 49-50, 70 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otago block, xvii ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otago Daily Times, 67 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otago Girls’ High School, xix, 61,", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {" 85 "}, + {"Otago gold rushes, 61-63 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otago Peninsula, xx ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otago Provincial Council, 68 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Otaki, 33 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}, + {"Owls Do Cry, 139 ", PSTART, PModel(kLeft, 0, 0, 30, 0)}}; TEST(ParagraphsTest, IndexPageTest) { TestParagraphDetection(kNewZealandIndex, ABSL_ARRAYSIZE(kNewZealandIndex)); diff --git a/unittest/params_model_test.cc b/unittest/params_model_test.cc index b01f9ebbd7..7b8ba84795 100644 --- a/unittest/params_model_test.cc +++ b/unittest/params_model_test.cc @@ -10,15 +10,14 @@ namespace { class ParamsModelTest : public testing::Test { protected: string TestDataNameToPath(const string& name) const { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string OutputNameToPath(const string& name) const { return file::JoinPath(FLAGS_test_tmpdir, name); } // Test that we are able to load a params model, save it, reload it, // and verify that the re-serialized version is the same as the original. - void TestParamsModelRoundTrip(const string ¶ms_model_filename) const { + void TestParamsModelRoundTrip(const string& params_model_filename) const { tesseract::ParamsModel orig_model; tesseract::ParamsModel duplicate_model; string orig_file = TestDataNameToPath(params_model_filename); @@ -26,7 +25,7 @@ class ParamsModelTest : public testing::Test { EXPECT_TRUE(orig_model.LoadFromFile("eng", orig_file.c_str())); EXPECT_TRUE(orig_model.SaveToFile(out_file.c_str())); - + EXPECT_TRUE(duplicate_model.LoadFromFile("eng", out_file.c_str())); EXPECT_TRUE(orig_model.Equivalent(duplicate_model)); } diff --git a/unittest/progress_test.cc b/unittest/progress_test.cc index b999b2c581..b5c5b37d1f 100644 --- a/unittest/progress_test.cc +++ b/unittest/progress_test.cc @@ -16,146 +16,146 @@ // expects clone of tessdata_fast repo in ../../tessdata_fast -#include "include_gunit.h" -#include "gmock/gmock.h" -#include "baseapi.h" -#include "ocrclass.h" -#include "leptonica/allheaders.h" -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include "baseapi.h" +#include "gmock/gmock.h" +#include "include_gunit.h" +#include "leptonica/allheaders.h" +#include "ocrclass.h" namespace { class QuickTest : public testing::Test { protected: - virtual void SetUp() { - start_time_ = time(nullptr); - } + virtual void SetUp() { start_time_ = time(nullptr); } virtual void TearDown() { const time_t end_time = time(nullptr); - EXPECT_TRUE(end_time - start_time_ <=25) << "The test took too long - " << ::testing::PrintToString(end_time - start_time_); + EXPECT_TRUE(end_time - start_time_ <= 25) + << "The test took too long - " + << ::testing::PrintToString(end_time - start_time_); } time_t start_time_; - }; - - class ClassicMockProgressSink { - public: - MOCK_METHOD1(classicProgress, bool( int ) ); - MOCK_METHOD1(cancel, bool( int )); - - ETEXT_DESC monitor; - - ClassicMockProgressSink() - { - monitor.progress_callback = []( int progress, int, int, int, int ) ->bool { - return instance->classicProgress( progress ); - }; - monitor.cancel = []( void* ths, int words ) -> bool { - return ((ClassicMockProgressSink*)ths)->cancel(words); - }; - monitor.cancel_this = this; - instance = this; - } - - static ClassicMockProgressSink* instance; - }; - - ClassicMockProgressSink* ClassicMockProgressSink::instance = nullptr; - - class NewMockProgressSink : public ClassicMockProgressSink { - public: - MOCK_METHOD1(progress, bool( int ) ); - - NewMockProgressSink() - { - monitor.progress_callback2 = [](ETEXT_DESC* ths, int, int, int, int ) -> bool { - return ((NewMockProgressSink*)ths->cancel_this)->progress( ths->progress ); - }; - } - }; - - void ClassicProgressTester(const char* imgname, const char* tessdatadir, const char* lang) { - using ::testing::_; - using ::testing::AllOf; - using ::testing::AtLeast; - using ::testing::DoAll; - using ::testing::Gt; - using ::testing::Le; - using ::testing::Return; - using ::testing::SaveArg; - - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); - ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); - ASSERT_TRUE(image != nullptr) << "Failed to read test image."; - api->SetImage(image); - - ClassicMockProgressSink progressSink; - - int currentProgress = -1; - EXPECT_CALL( progressSink, classicProgress(AllOf(Gt(currentProgress),Le(100))) ) - .Times(AtLeast(5)) - .WillRepeatedly( DoAll(SaveArg<0>(¤tProgress), - Return(false) )); - EXPECT_CALL( progressSink, cancel(_) ) - .Times(AtLeast(5)) - .WillRepeatedly(Return(false)); - - EXPECT_EQ( api->Recognize( &progressSink.monitor ), false ); - EXPECT_GE( currentProgress, 50 ) << "The reported progress did not reach 50%"; - - api->End(); - pixDestroy(&image); +}; + +class ClassicMockProgressSink { + public: + MOCK_METHOD1(classicProgress, bool(int)); + MOCK_METHOD1(cancel, bool(int)); + + ETEXT_DESC monitor; + + ClassicMockProgressSink() { + monitor.progress_callback = [](int progress, int, int, int, int) -> bool { + return instance->classicProgress(progress); + }; + monitor.cancel = [](void* ths, int words) -> bool { + return ((ClassicMockProgressSink*)ths)->cancel(words); + }; + monitor.cancel_this = this; + instance = this; } - void NewProgressTester(const char* imgname, const char* tessdatadir, const char* lang) { - using ::testing::_; - using ::testing::AllOf; - using ::testing::AtLeast; - using ::testing::DoAll; - using ::testing::Gt; - using ::testing::Le; - using ::testing::Return; - using ::testing::SaveArg; - - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); - ASSERT_FALSE(api->Init(tessdatadir, lang)) << "Could not initialize tesseract."; - Pix *image = pixRead(imgname); - ASSERT_TRUE(image != nullptr) << "Failed to read test image."; - api->SetImage(image); - - NewMockProgressSink progressSink; - - int currentProgress = -1; - EXPECT_CALL( progressSink, classicProgress(_) ) - .Times(0); - EXPECT_CALL( progressSink, progress(AllOf(Gt(currentProgress),Le(100))) ) - .Times(AtLeast(5)) - .WillRepeatedly( DoAll(SaveArg<0>(¤tProgress), - Return(false) )); - EXPECT_CALL( progressSink, cancel(_) ) - .Times(AtLeast(5)) - .WillRepeatedly(Return(false)); - - EXPECT_EQ( api->Recognize( &progressSink.monitor ), false ); - EXPECT_GE( currentProgress, 50 ) << "The reported progress did not reach 50%"; - - api->End(); - pixDestroy(&image); - } + static ClassicMockProgressSink* instance; +}; - TEST(QuickTest, ClassicProgressReporitng) { - ClassicProgressTester(TESTING_DIR "/phototest.tif", - TESSDATA_DIR "_fast", "eng"); - } +ClassicMockProgressSink* ClassicMockProgressSink::instance = nullptr; + +class NewMockProgressSink : public ClassicMockProgressSink { + public: + MOCK_METHOD1(progress, bool(int)); - TEST(QuickTest, NewProgressReporitng) { - NewProgressTester(TESTING_DIR "/phototest.tif", - TESSDATA_DIR "_fast", "eng"); + NewMockProgressSink() { + monitor.progress_callback2 = [](ETEXT_DESC* ths, int, int, int, + int) -> bool { + return ((NewMockProgressSink*)ths->cancel_this)->progress(ths->progress); + }; } +}; + +void ClassicProgressTester(const char* imgname, const char* tessdatadir, + const char* lang) { + using ::testing::_; + using ::testing::AllOf; + using ::testing::AtLeast; + using ::testing::DoAll; + using ::testing::Gt; + using ::testing::Le; + using ::testing::Return; + using ::testing::SaveArg; + + tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); + ASSERT_FALSE(api->Init(tessdatadir, lang)) + << "Could not initialize tesseract."; + Pix* image = pixRead(imgname); + ASSERT_TRUE(image != nullptr) << "Failed to read test image."; + api->SetImage(image); + + ClassicMockProgressSink progressSink; + + int currentProgress = -1; + EXPECT_CALL(progressSink, + classicProgress(AllOf(Gt(currentProgress), Le(100)))) + .Times(AtLeast(5)) + .WillRepeatedly(DoAll(SaveArg<0>(¤tProgress), Return(false))); + EXPECT_CALL(progressSink, cancel(_)) + .Times(AtLeast(5)) + .WillRepeatedly(Return(false)); + + EXPECT_EQ(api->Recognize(&progressSink.monitor), false); + EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%"; + + api->End(); + pixDestroy(&image); +} + +void NewProgressTester(const char* imgname, const char* tessdatadir, + const char* lang) { + using ::testing::_; + using ::testing::AllOf; + using ::testing::AtLeast; + using ::testing::DoAll; + using ::testing::Gt; + using ::testing::Le; + using ::testing::Return; + using ::testing::SaveArg; + + tesseract::TessBaseAPI* api = new tesseract::TessBaseAPI(); + ASSERT_FALSE(api->Init(tessdatadir, lang)) + << "Could not initialize tesseract."; + Pix* image = pixRead(imgname); + ASSERT_TRUE(image != nullptr) << "Failed to read test image."; + api->SetImage(image); + + NewMockProgressSink progressSink; + + int currentProgress = -1; + EXPECT_CALL(progressSink, classicProgress(_)).Times(0); + EXPECT_CALL(progressSink, progress(AllOf(Gt(currentProgress), Le(100)))) + .Times(AtLeast(5)) + .WillRepeatedly(DoAll(SaveArg<0>(¤tProgress), Return(false))); + EXPECT_CALL(progressSink, cancel(_)) + .Times(AtLeast(5)) + .WillRepeatedly(Return(false)); + + EXPECT_EQ(api->Recognize(&progressSink.monitor), false); + EXPECT_GE(currentProgress, 50) << "The reported progress did not reach 50%"; + + api->End(); + pixDestroy(&image); +} + +TEST(QuickTest, ClassicProgressReporitng) { + ClassicProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", + "eng"); +} + +TEST(QuickTest, NewProgressReporitng) { + NewProgressTester(TESTING_DIR "/phototest.tif", TESSDATA_DIR "_fast", "eng"); +} } // namespace diff --git a/unittest/qrsequence_test.cc b/unittest/qrsequence_test.cc index 90fa6a2240..4d748caf60 100644 --- a/unittest/qrsequence_test.cc +++ b/unittest/qrsequence_test.cc @@ -18,14 +18,13 @@ class TestableQRSequenceGenerator : public QRSequenceGenerator { TEST(QRSequenceGenerator, GetBinaryReversedInteger) { const int kRangeSize = 8; TestableQRSequenceGenerator generator(kRangeSize); - int reversed_vals[kRangeSize] = { 0, 4, 2, 6, 1, 5, 3, 7}; + int reversed_vals[kRangeSize] = {0, 4, 2, 6, 1, 5, 3, 7}; for (int i = 0; i < kRangeSize; ++i) EXPECT_EQ(reversed_vals[i], generator.GetBinaryReversedInteger(i)); } // Trivial test fixture for a parameterized test. -class QRSequenceGeneratorTest : public ::testing::TestWithParam { -}; +class QRSequenceGeneratorTest : public ::testing::TestWithParam {}; TEST_P(QRSequenceGeneratorTest, GeneratesValidSequence) { const int kRangeSize = GetParam(); @@ -33,8 +32,7 @@ TEST_P(QRSequenceGeneratorTest, GeneratesValidSequence) { std::vector vals(kRangeSize); CycleTimer timer; timer.Restart(); - for (int i = 0; i < kRangeSize; ++i) - vals[i] = generator.GetVal(); + for (int i = 0; i < kRangeSize; ++i) vals[i] = generator.GetVal(); LOG(INFO) << kRangeSize << "-length sequence took " << timer.Get() * 1e3 << "ms"; // Sort the numbers to verify that we've covered the range without repetition. diff --git a/unittest/recodebeam_test.cc b/unittest/recodebeam_test.cc index f763216ca3..874938e6d6 100644 --- a/unittest/recodebeam_test.cc +++ b/unittest/recodebeam_test.cc @@ -10,10 +10,10 @@ using tesseract::CCUtil; using tesseract::Dict; -using tesseract::RecodedCharID; +using tesseract::PointerVector; using tesseract::RecodeBeamSearch; +using tesseract::RecodedCharID; using tesseract::RecodeNode; -using tesseract::PointerVector; using tesseract::TRand; using tesseract::UnicharCompress; @@ -59,13 +59,11 @@ class RecodeBeamTest : public ::testing::Test { // Loads and compresses the given unicharset. void LoadUnicharset(const string& unicharset_name) { - string radical_stroke_file = - file::JoinPath(FLAGS_test_srcdir, - "tesseract/training" - "/langdata/radical-stroke.txt"); - string unicharset_file = file::JoinPath( - FLAGS_test_srcdir, "testdata", - unicharset_name); + string radical_stroke_file = file::JoinPath(FLAGS_test_srcdir, + "tesseract/training" + "/langdata/radical-stroke.txt"); + string unicharset_file = + file::JoinPath(FLAGS_test_srcdir, "testdata", unicharset_name); string uni_data; CHECK_OK(file::GetContents(unicharset_file, &uni_data, file::Defaults())); string radical_data; @@ -94,9 +92,8 @@ class RecodeBeamTest : public ::testing::Test { // Loads the dictionary. void LoadDict(const string& lang) { string traineddata_name = lang + ".traineddata"; - string traineddata_file = file::JoinPath( - FLAGS_test_srcdir, "testdata", - traineddata_name); + string traineddata_file = + file::JoinPath(FLAGS_test_srcdir, "testdata", traineddata_name); lstm_dict_.SetupForLoad(NULL); tesseract::TessdataManager mgr; mgr.Init(traineddata_file.c_str()); @@ -140,8 +137,8 @@ class RecodeBeamTest : public ::testing::Test { code.length() < RecodedCharID::kMaxCodeLen && (uni_id == INVALID_UNICHAR_ID || !recoder_.IsValidFirstCode(labels[index]))); - EXPECT_NE(INVALID_UNICHAR_ID, uni_id) << "index=" << index << "/" - << labels.size(); + EXPECT_NE(INVALID_UNICHAR_ID, uni_id) + << "index=" << index << "/" << labels.size(); // To the extent of truth_utf8, we expect decoded to match, but if // transcription is shorter, that is OK too, as we may just be testing // that we get a valid sequence when padded with random data. diff --git a/unittest/rect_test.cc b/unittest/rect_test.cc index cefaaba9c4..069ca7dea2 100644 --- a/unittest/rect_test.cc +++ b/unittest/rect_test.cc @@ -16,12 +16,10 @@ namespace { class TBOXTest : public testing::Test { -public: - void SetUp() { - } + public: + void SetUp() {} - void TearDown() { - } + void TearDown() {} }; TEST_F(TBOXTest, OverlapInside) { @@ -56,10 +54,8 @@ TEST_F(TBOXTest, OverlapFractionCorners) { mid.overlap_fraction(bottom_left)); EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), bottom_left.overlap_fraction(mid)); - EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), - mid.overlap_fraction(top_left)); - EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), - top_left.overlap_fraction(mid)); + EXPECT_DOUBLE_EQ((5.0 * 5.0) / (20.0 * 20.0), mid.overlap_fraction(top_left)); + EXPECT_DOUBLE_EQ((5.0 * 5.0) / (10.0 * 10.0), top_left.overlap_fraction(mid)); } TEST_F(TBOXTest, OverlapBoolSides) { @@ -175,4 +171,4 @@ TEST_F(TBOXTest, OverlapYFractionZeroSize) { EXPECT_DOUBLE_EQ(0.0, small.y_overlap_fraction(zero)); } -} // namespace +} // namespace diff --git a/unittest/resultiterator_test.cc b/unittest/resultiterator_test.cc index e420a3245e..5ec8bc2d8d 100644 --- a/unittest/resultiterator_test.cc +++ b/unittest/resultiterator_test.cc @@ -12,18 +12,18 @@ DEFINE_string(tess_config, "", "config file for tesseract"); DEFINE_bool(visual_test, false, "Runs a visual test using scrollview"); using tesseract::PageIterator; -using tesseract::ResultIterator; using tesseract::PageIteratorLevel; +using tesseract::ResultIterator; // Helper functions for converting to STL vectors -template -void ToVector(const GenericVector &from, std::vector *to) { +template +void ToVector(const GenericVector& from, std::vector* to) { to->clear(); for (int i = 0; i < from.size(); i++) to->push_back(from[i]); } -template -void ToVector(const GenericVectorEqEq &from, std::vector *to) { +template +void ToVector(const GenericVectorEqEq& from, std::vector* to) { to->clear(); for (int i = 0; i < from.size(); i++) to->push_back(from[i]); } @@ -32,22 +32,17 @@ void ToVector(const GenericVectorEqEq &from, std::vector *to) { class ResultIteratorTest : public testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessdataPath() { - return file::JoinPath(FLAGS_test_srcdir, - "tessdata"); + return file::JoinPath(FLAGS_test_srcdir, "tessdata"); } string OutputNameToPath(const string& name) { return file::JoinPath(FLAGS_test_tmpdir, name); } - ResultIteratorTest() { - src_pix_ = NULL; - } - ~ResultIteratorTest() { - } + ResultIteratorTest() { src_pix_ = NULL; } + ~ResultIteratorTest() {} void SetImage(const char* filename) { src_pix_ = pixRead(TestDataNameToPath(filename).c_str()); @@ -63,16 +58,14 @@ class ResultIteratorTest : public testing::Test { // Rebuilds the image using the binary images at the given level, and // EXPECTs that the number of pixels in the xor of the rebuilt image with // the original is at most max_diff. - void VerifyRebuild(int max_diff, - PageIteratorLevel level, PageIterator* it) { + void VerifyRebuild(int max_diff, PageIteratorLevel level, PageIterator* it) { it->Begin(); int width = pixGetWidth(src_pix_); int height = pixGetHeight(src_pix_); int depth = pixGetDepth(src_pix_); Pix* pix = pixCreate(width, height, depth); EXPECT_TRUE(depth == 1 || depth == 8); - if (depth == 8) - pixSetAll(pix); + if (depth == 8) pixSetAll(pix); do { int left, top, right, bottom; PageIteratorLevel im_level = level; @@ -81,8 +74,8 @@ class ResultIteratorTest : public testing::Test { im_level = tesseract::RIL_BLOCK; EXPECT_TRUE(it->BoundingBox(im_level, &left, &top, &right, &bottom)); } - VLOG(1) << "BBox: [L:" << left << ", T:" << top - << ", R:" << right << ", B:" << bottom << "]"; + VLOG(1) << "BBox: [L:" << left << ", T:" << top << ", R:" << right + << ", B:" << bottom << "]"; Pix* block_pix; if (depth == 1) { block_pix = it->GetBinaryImage(im_level); @@ -90,9 +83,9 @@ class ResultIteratorTest : public testing::Test { PIX_SRC ^ PIX_DST, block_pix, 0, 0); } else { block_pix = it->GetImage(im_level, 2, src_pix_, &left, &top); - pixRasterop(pix, left, top, - pixGetWidth(block_pix), pixGetHeight(block_pix), - PIX_SRC & PIX_DST, block_pix, 0, 0); + pixRasterop(pix, left, top, pixGetWidth(block_pix), + pixGetHeight(block_pix), PIX_SRC & PIX_DST, block_pix, 0, + 0); } CHECK(block_pix != nullptr); pixDestroy(&block_pix); @@ -123,8 +116,7 @@ class ResultIteratorTest : public testing::Test { // Rebuilds the text from the iterator strings at the given level, and // EXPECTs that the rebuild string exactly matches the truth string. - void VerifyIteratorText(const string& truth, - PageIteratorLevel level, + void VerifyIteratorText(const string& truth, PageIteratorLevel level, ResultIterator* it) { VLOG(1) << "Text Test Level " << level; it->Begin(); @@ -132,7 +124,7 @@ class ResultIteratorTest : public testing::Test { do { char* text = it->GetUTF8Text(level); result += text; - delete [] text; + delete[] text; if ((level == tesseract::RIL_WORD || level == tesseract::RIL_SYMBOL) && it->IsAtFinalElement(tesseract::RIL_WORD, level)) { if (it->IsAtFinalElement(tesseract::RIL_TEXTLINE, level)) { @@ -140,8 +132,7 @@ class ResultIteratorTest : public testing::Test { } else { result += ' '; } - if (it->IsAtFinalElement(tesseract::RIL_PARA, level)) - result += '\n'; + if (it->IsAtFinalElement(tesseract::RIL_PARA, level)) result += '\n'; } } while (it->Next(level)); EXPECT_STREQ(truth.c_str(), result.c_str()) @@ -170,9 +161,10 @@ class ResultIteratorTest : public testing::Test { // expected output reading order // (expected_reading_order[num_reading_order_entries]) and a given reading // context (ltr or rtl). - void ExpectTextlineReadingOrder( - bool in_ltr_context, StrongScriptDirection *word_dirs, int num_words, - int *expected_reading_order, int num_reading_order_entries) const { + void ExpectTextlineReadingOrder(bool in_ltr_context, + StrongScriptDirection* word_dirs, + int num_words, int* expected_reading_order, + int num_reading_order_entries) const { GenericVector gv_word_dirs; for (int i = 0; i < num_words; i++) { gv_word_dirs.push_back(word_dirs[i]); @@ -195,7 +187,7 @@ class ResultIteratorTest : public testing::Test { // Sane means that the output contains some permutation of the indices // 0..[num_words - 1] interspersed optionally with negative (marker) values. void VerifySaneTextlineOrder(bool in_ltr_context, - StrongScriptDirection *word_dirs, + StrongScriptDirection* word_dirs, int num_words) const { GenericVector gv_word_dirs; for (int i = 0; i < num_words; i++) { @@ -235,7 +227,6 @@ class ResultIteratorTest : public testing::Test { tesseract::TessBaseAPI api_; }; - // Tests layout analysis output (and scrollview) on the UNLV page numbered // 8087_054.3G.tif. (Dubrovnik), but only if --visual_test is true. TEST_F(ResultIteratorTest, VisualTest) { @@ -249,8 +240,8 @@ TEST_F(ResultIteratorTest, VisualTest) { // Make a scrollview window for the display. int width = pixGetWidth(src_pix_); int height = pixGetHeight(src_pix_); - ScrollView* win = new ScrollView(kIms[i], 100, 100, - width / 2, height / 2, width, height); + ScrollView* win = + new ScrollView(kIms[i], 100, 100, width / 2, height / 2, width, height); win->Image(src_pix_, 0, 0); it->Begin(); ScrollView::Color color = ScrollView::RED; @@ -296,7 +287,7 @@ TEST_F(ResultIteratorTest, EasyTest) { char* result = api_.GetUTF8Text(); ocr_text_ = result; - delete [] result; + delete[] result; ResultIterator* r_it = api_.GetIterator(); // The images should rebuild almost perfectly. LOG(INFO) << "Verifying image rebuilds 2a (resultiterator)"; @@ -330,15 +321,15 @@ TEST_F(ResultIteratorTest, EasyTest) { do { bool bold, italic, underlined, monospace, serif, smallcaps; int pointsize, font_id; - const char* font = r_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); + const char* font = + r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, + &serif, &smallcaps, &pointsize, &font_id); float confidence = r_it->Confidence(tesseract::RIL_WORD); EXPECT_GE(confidence, 80.0f); char* word_str = r_it->GetUTF8Text(tesseract::RIL_WORD); VLOG(1) << StringPrintf("Word %s in font %s, id %d, size %d, conf %g", word_str, font, font_id, pointsize, confidence); - delete [] word_str; + delete[] word_str; EXPECT_FALSE(bold); EXPECT_FALSE(italic); EXPECT_FALSE(underlined); @@ -379,7 +370,7 @@ TEST_F(ResultIteratorTest, GreyTest) { TEST_F(ResultIteratorTest, SmallCapDropCapTest) { SetImage("8071_093.3B.tif"); char* result = api_.GetUTF8Text(); - delete [] result; + delete[] result; ResultIterator* r_it = api_.GetIterator(); // Iterate over the words. int found_dropcaps = 0; @@ -388,26 +379,23 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) { do { bool bold, italic, underlined, monospace, serif, smallcaps; int pointsize, font_id; - r_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); + r_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif, + &smallcaps, &pointsize, &font_id); char* word_str = r_it->GetUTF8Text(tesseract::RIL_WORD); if (word_str != NULL) { - VLOG(1) << StringPrintf("Word %s is %s", - word_str, smallcaps ? "Smallcaps" : "Normal"); + VLOG(1) << StringPrintf("Word %s is %s", word_str, + smallcaps ? "Smallcaps" : "Normal"); if (r_it->SymbolIsDropcap()) { ++found_dropcaps; } - if (strcmp(word_str, "SHE") == 0 || - strcmp(word_str, "MOPED") == 0 || + if (strcmp(word_str, "SHE") == 0 || strcmp(word_str, "MOPED") == 0 || strcmp(word_str, "RALPH") == 0 || strcmp(word_str, "KINNEY") == 0 || // Not working yet. strcmp(word_str, "BENNETT") == 0) { EXPECT_TRUE(smallcaps) << word_str; ++found_smallcaps; } else { - if (smallcaps) - ++false_positives; + if (smallcaps) ++false_positives; } // No symbol other than the first of any word should be dropcap. ResultIterator s_it(*r_it); @@ -415,13 +403,13 @@ TEST_F(ResultIteratorTest, SmallCapDropCapTest) { !s_it.IsAtBeginningOf(tesseract::RIL_WORD)) { if (s_it.SymbolIsDropcap()) { char* sym_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL); - LOG(ERROR) << StringPrintf("Symbol %s of word %s is dropcap", - sym_str, word_str); - delete [] sym_str; + LOG(ERROR) << StringPrintf("Symbol %s of word %s is dropcap", sym_str, + word_str); + delete[] sym_str; } EXPECT_FALSE(s_it.SymbolIsDropcap()); } - delete [] word_str; + delete[] word_str; } } while (r_it->Next(tesseract::RIL_WORD)); delete r_it; @@ -486,12 +474,13 @@ static const StrongScriptDirection dZ = DIR_MIX; // interpreted appropriately in different contexts. TEST_F(ResultIteratorTest, DualStartTextlineOrderTest) { StrongScriptDirection word_dirs[] = {dL, dL, dN, dL, dN, dR, dR, dR}; - int reading_order_rtl_context[] = { - 7, 6, 5, 4, ResultIterator::kMinorRunStart, 0, 1, 2, 3, - ResultIterator::kMinorRunEnd}; - int reading_order_ltr_context[] = { - 0, 1, 2, 3, 4, ResultIterator::kMinorRunStart, 7, 6, 5, - ResultIterator::kMinorRunEnd}; + int reading_order_rtl_context[] = {7, 6, 5, 4, ResultIterator::kMinorRunStart, + 0, 1, 2, 3, ResultIterator::kMinorRunEnd}; + int reading_order_ltr_context[] = {0, 1, + 2, 3, + 4, ResultIterator::kMinorRunStart, + 7, 6, + 5, ResultIterator::kMinorRunEnd}; ExpectTextlineReadingOrder(true, word_dirs, ABSL_ARRAYSIZE(word_dirs), reading_order_ltr_context, @@ -510,8 +499,8 @@ TEST_F(ResultIteratorTest, LeftwardTextlineOrderTest) { // In the strange event that this shows up in an RTL paragraph, nonetheless // just presume the whole thing is an LTR line. int reading_order_rtl_context[] = { - ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7, - ResultIterator::kMinorRunEnd}; + ResultIterator::kMinorRunStart, 0, 1, 2, 3, 4, 5, 6, 7, + ResultIterator::kMinorRunEnd}; ExpectTextlineReadingOrder(true, word_dirs, ABSL_ARRAYSIZE(word_dirs), reading_order_ltr_context, @@ -553,7 +542,7 @@ TEST_F(ResultIteratorTest, TextlineOrderSanityCheck) { TEST_F(ResultIteratorTest, NonNullChoicesTest) { SetImage("5318c4b679264.jpg"); char* result = api_.GetUTF8Text(); - delete [] result; + delete[] result; ResultIterator* r_it = api_.GetIterator(); // Iterate over the words. do { @@ -571,10 +560,10 @@ TEST_F(ResultIteratorTest, NonNullChoicesTest) { VLOG(1) << "Char choice " << char_str; CHECK(char_str != nullptr); } while (c_it.Next()); - } while (!s_it.IsAtFinalElement(tesseract::RIL_WORD, - tesseract::RIL_SYMBOL) && - s_it.Next(tesseract::RIL_SYMBOL)); - delete [] word_str; + } while ( + !s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) && + s_it.Next(tesseract::RIL_SYMBOL)); + delete[] word_str; } } while (r_it->Next(tesseract::RIL_WORD)); delete r_it; @@ -586,7 +575,7 @@ TEST_F(ResultIteratorTest, NonNullConfidencesTest) { // Force recognition so we can used the result iterator. // We don't care about the return from GetUTF8Text. char* result = api_.GetUTF8Text(); - delete [] result; + delete[] result; ResultIterator* r_it = api_.GetIterator(); // Iterate over the words. do { @@ -599,13 +588,13 @@ TEST_F(ResultIteratorTest, NonNullConfidencesTest) { const char* char_str = s_it.GetUTF8Text(tesseract::RIL_SYMBOL); CHECK(char_str != nullptr); float confidence = s_it.Confidence(tesseract::RIL_SYMBOL); - VLOG(1) << StringPrintf("Char %s has confidence %g\n", - char_str, confidence); - delete [] char_str; - } while (!s_it.IsAtFinalElement(tesseract::RIL_WORD, - tesseract::RIL_SYMBOL) && - s_it.Next(tesseract::RIL_SYMBOL)); - delete [] word_str; + VLOG(1) << StringPrintf("Char %s has confidence %g\n", char_str, + confidence); + delete[] char_str; + } while ( + !s_it.IsAtFinalElement(tesseract::RIL_WORD, tesseract::RIL_SYMBOL) && + s_it.Next(tesseract::RIL_SYMBOL)); + delete[] word_str; } else { VLOG(1) << "Empty word found"; } diff --git a/unittest/scanutils_test.cc b/unittest/scanutils_test.cc index 9237103f8c..5d14924eae 100644 --- a/unittest/scanutils_test.cc +++ b/unittest/scanutils_test.cc @@ -7,8 +7,7 @@ namespace { class ScanutilsTest : public ::testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } }; @@ -32,15 +31,13 @@ TEST_F(ScanutilsTest, DoesScanf) { int r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]); int r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]); EXPECT_EQ(r1, r2); - for (int i = 0; i < kNumFloats; ++i) - EXPECT_FLOAT_EQ(f1[i], f2[i]); + for (int i = 0; i < kNumFloats; ++i) EXPECT_FLOAT_EQ(f1[i], f2[i]); const int kNumInts = 5; int i1[kNumInts], i2[kNumInts]; r1 = fscanf(fp1, "%d %d %d %d %i", &i1[0], &i1[1], &i1[2], &i1[3], &i1[4]); r2 = tfscanf(fp2, "%d %d %d %d %i", &i2[0], &i2[1], &i2[2], &i2[3], &i2[4]); EXPECT_EQ(r1, r2); - for (int i = 0; i < kNumInts; ++i) - EXPECT_EQ(i1[i], i2[i]); + for (int i = 0; i < kNumInts; ++i) EXPECT_EQ(i1[i], i2[i]); const int kStrLen = 1024; char s1[kStrLen]; char s2[kStrLen]; @@ -68,11 +65,10 @@ TEST_F(ScanutilsTest, DoesScanf) { r1 = fscanf(fp1, "%f %f %f %f", &f1[0], &f1[1], &f1[2], &f1[3]); r2 = tfscanf(fp2, "%f %f %f %f", &f2[0], &f2[1], &f2[2], &f2[3]); EXPECT_EQ(r1, r2); - for (int i = 0; i < kNumFloats; ++i) - EXPECT_FLOAT_EQ(f1[i], f2[i]); + for (int i = 0; i < kNumFloats; ++i) EXPECT_FLOAT_EQ(f1[i], f2[i]); // Test the * for field suppression. r1 = fscanf(fp1, "%d %*s %*d %*f %*f", &i1[0]); - r2 = tfscanf(fp2,"%d %*s %*d %*f %*f", &i2[0]); + r2 = tfscanf(fp2, "%d %*s %*d %*f %*f", &i2[0]); EXPECT_EQ(r1, r2); EXPECT_EQ(i1[0], i2[0]); // We should still see the next value and no phantoms. @@ -84,4 +80,3 @@ TEST_F(ScanutilsTest, DoesScanf) { } } // namespace - diff --git a/unittest/shapetable_test.cc b/unittest/shapetable_test.cc index 86ee5fb4bf..64aa0dd98a 100644 --- a/unittest/shapetable_test.cc +++ b/unittest/shapetable_test.cc @@ -39,8 +39,7 @@ static void Expect352(int font_id, const Shape& shape) { } // The fixture for testing Shape. -class ShapeTest : public testing::Test { -}; +class ShapeTest : public testing::Test {}; // Tests that a Shape works as expected for all the basic functions. TEST_F(ShapeTest, BasicTest) { @@ -97,8 +96,7 @@ TEST_F(ShapeTest, AddShapeTest) { } // The fixture for testing Shape. -class ShapeTableTest : public testing::Test { -}; +class ShapeTableTest : public testing::Test {}; // Tests that a Shape works as expected for all the basic functions. TEST_F(ShapeTableTest, FullTest) { @@ -148,5 +146,3 @@ TEST_F(ShapeTableTest, FullTest) { } } // namespace - - diff --git a/unittest/stats_test.cc b/unittest/stats_test.cc index 771d01c8c2..691af962ed 100644 --- a/unittest/stats_test.cc +++ b/unittest/stats_test.cc @@ -9,15 +9,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "statistc.h" #include "genericvector.h" #include "kdpair.h" +#include "statistc.h" #include "include_gunit.h" namespace { -const int kTestData[] = { 2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1 }; +const int kTestData[] = {2, 0, 12, 1, 1, 2, 10, 1, 0, 0, 0, 2, 0, 4, 1, 1}; class STATSTest : public testing::Test { public: @@ -27,8 +27,7 @@ class STATSTest : public testing::Test { stats_.add(i, kTestData[i]); } - void TearDown() { - } + void TearDown() {} STATS stats_; }; diff --git a/unittest/stridemap_test.cc b/unittest/stridemap_test.cc index 855e43f5ba..2427eba0f9 100644 --- a/unittest/stridemap_test.cc +++ b/unittest/stridemap_test.cc @@ -1,9 +1,9 @@ #include "tesseract/lstm/stridemap.h" -using tesseract::FlexDimensions; using tesseract::FD_BATCH; using tesseract::FD_HEIGHT; using tesseract::FD_WIDTH; +using tesseract::FlexDimensions; using tesseract::StrideMap; namespace { @@ -104,8 +104,8 @@ TEST_F(StridemapTest, Scaling) { // Scale x by 2, keeping y the same. std::vector values_x2 = {0, 1, 4, 5, 8, 9, 12, 13, 17, 18, - 22, 23, 27, 28, 32, 33, 36, 37, 40, 41, - 44, 45, 48, 49, 53, 54, 58, 59}; + 22, 23, 27, 28, 32, 33, 36, 37, 40, 41, + 44, 45, 48, 49, 53, 54, 58, 59}; StrideMap test_map(stride_map); test_map.ScaleXY(2, 1); StrideMap::Index index(test_map); @@ -121,8 +121,8 @@ TEST_F(StridemapTest, Scaling) { test_map = stride_map; // Scale y by 2, keeping x the same. std::vector values_y2 = {0, 1, 2, 3, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 32, 33, 34, 35, - 36, 37, 38, 39, 48, 49, 50, 51, 52}; + 17, 18, 19, 20, 21, 32, 33, 34, 35, + 36, 37, 38, 39, 48, 49, 50, 51, 52}; test_map.ScaleXY(1, 2); index.InitToFirst(); pos = 0; @@ -151,7 +151,7 @@ TEST_F(StridemapTest, Scaling) { test_map = stride_map; // Reduce Width to 1. std::vector values_x_to_1 = {0, 4, 8, 12, 17, 22, 27, - 32, 36, 40, 44, 48, 53, 58}; + 32, 36, 40, 44, 48, 53, 58}; test_map.ReduceWidthTo1(); index.InitToFirst(); pos = 0; diff --git a/unittest/stringrenderer_test.cc b/unittest/stringrenderer_test.cc index 18dfdd86dc..87eb6b5894 100644 --- a/unittest/stringrenderer_test.cc +++ b/unittest/stringrenderer_test.cc @@ -23,9 +23,9 @@ namespace { const char kEngText[] = "the quick brown fox jumps over the lazy dog"; const char kHinText[] = "पिताने विवाह की | हो गई उद्विग्न वह सोचा"; -const char kKorText[] = - "이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는"; -const char kArabicText[] = "والفكر والصراع ، بالتأمل والفهم والتحليل ، " +const char kKorText[] = "이는 것으로 다시 넣을 1234 수는 있지만 선택의 의미는"; +const char kArabicText[] = + "والفكر والصراع ، بالتأمل والفهم والتحليل ، " "بالعلم والفن ، وأخيرا بالضحك أوبالبكاء ، "; const char kMixedText[] = "والفكر 123 والصراع abc"; @@ -40,8 +40,7 @@ class StringRendererTest : public ::testing::Test { protected: static void SetUpTestCase() { l_chooseDisplayProg(L_DISPLAY_WITH_XZGV); - FLAGS_fonts_dir = file::JoinPath( - FLAGS_test_srcdir, "testdata"); + FLAGS_fonts_dir = file::JoinPath(FLAGS_test_srcdir, "testdata"); FLAGS_fontconfig_tmpdir = FLAGS_test_tmpdir; FLAGS_use_only_legacy_fonts = false; // Needed for reliable heapchecking of pango layout structures. @@ -66,7 +65,7 @@ class StringRendererTest : public ::testing::Test { TEST_F(StringRendererTest, DoesRenderToImage) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); EXPECT_TRUE(pix != NULL); @@ -112,7 +111,7 @@ TEST_F(StringRendererTest, DoesRenderToImageWithUnderline) { // Underline all words but NOT intervening spaces. renderer_->set_underline_start_prob(1.0); renderer_->set_underline_continuation_prob(0); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); EXPECT_TRUE(pix != NULL); @@ -147,7 +146,7 @@ TEST_F(StringRendererTest, DoesHandleNewlineCharacters) { const char kRawText[] = "\n\n\n A \nB \nC \n\n\n"; const char kStrippedText[] = " A B C "; // text with newline chars removed renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kRawText), renderer_->RenderToImage(kRawText, strlen(kRawText), &pix)); EXPECT_TRUE(pix != NULL); @@ -167,9 +166,9 @@ TEST_F(StringRendererTest, DoesRenderLigatures) { const char kArabicLigature[] = "لا"; Pix* pix = NULL; - EXPECT_EQ(strlen(kArabicLigature), - renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), - &pix)); + EXPECT_EQ( + strlen(kArabicLigature), + renderer_->RenderToImage(kArabicLigature, strlen(kArabicLigature), &pix)); EXPECT_TRUE(pix != NULL); EXPECT_GT(renderer_->GetBoxes().size(), 0); const std::vector& boxes = renderer_->GetBoxes(); @@ -186,12 +185,10 @@ TEST_F(StringRendererTest, DoesRenderLigatures) { pixDestroy(&pix); } - static int FindBoxCharXCoord(const std::vector& boxchars, const string& ch) { for (int i = 0; i < boxchars.size(); ++i) { - if (boxchars[i]->ch() == ch) - return boxchars[i]->box()->x; + if (boxchars[i]->ch() == ch) return boxchars[i]->box()->x; } return kint32max; } @@ -223,14 +220,14 @@ TEST_F(StringRendererTest, ArabicBoxcharsInLTROrder) { TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { renderer_.reset(new StringRenderer("Arab 10", 600, 600)); - Pix *pix = NULL; + Pix* pix = NULL; // Arabic letters should be in decreasing x-coordinates const char kArabicWord[] = "والفكر"; renderer_->RenderToImage(kArabicWord, strlen(kArabicWord), &pix); EXPECT_GT(renderer_->GetBoxes().size(), 0); const std::vector& boxchars = renderer_->GetBoxes(); for (int i = 0; i < boxchars.size() - 1; ++i) { - EXPECT_GT(boxchars[i]->box()->x, boxchars[i+1]->box()->x) + EXPECT_GT(boxchars[i]->box()->x, boxchars[i + 1]->box()->x) << boxchars[i]->ch(); } pixDestroy(&pix); @@ -241,7 +238,7 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { renderer_->RenderToImage(kEnglishWord, strlen(kEnglishWord), &pix); EXPECT_EQ(boxchars.size(), strlen(kEnglishWord)); for (int i = 0; i < boxchars.size() - 1; ++i) { - EXPECT_LT(boxchars[i]->box()->x, boxchars[i+1]->box()->x) + EXPECT_LT(boxchars[i]->box()->x, boxchars[i + 1]->box()->x) << boxchars[i]->ch(); } pixDestroy(&pix); @@ -255,7 +252,6 @@ TEST_F(StringRendererTest, DoesOutputBoxcharsInReadingOrder) { pixDestroy(&pix); } - TEST_F(StringRendererTest, DoesRenderVerticalText) { Pix* pix = NULL; renderer_.reset(new StringRenderer("UnBatang 10", 600, 600)); @@ -271,7 +267,7 @@ TEST_F(StringRendererTest, DoesRenderVerticalText) { // appropriate page numbers. TEST_F(StringRendererTest, DoesKeepAllImageBoxes) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); - Pix *pix = NULL; + Pix* pix = NULL; int num_boxes_per_page = 0; const int kNumTrials = 2; for (int i = 0; i < kNumTrials; ++i) { @@ -283,9 +279,10 @@ TEST_F(StringRendererTest, DoesKeepAllImageBoxes) { if (!num_boxes_per_page) { num_boxes_per_page = renderer_->GetBoxes().size(); } else { - EXPECT_EQ((i+1) * num_boxes_per_page, renderer_->GetBoxes().size()); + EXPECT_EQ((i + 1) * num_boxes_per_page, renderer_->GetBoxes().size()); } - for (int j = i * num_boxes_per_page; j < (i+1) * num_boxes_per_page; ++j) { + for (int j = i * num_boxes_per_page; j < (i + 1) * num_boxes_per_page; + ++j) { EXPECT_EQ(i, renderer_->GetBoxes()[j]->page()); } } @@ -293,7 +290,7 @@ TEST_F(StringRendererTest, DoesKeepAllImageBoxes) { TEST_F(StringRendererTest, DoesClearBoxes) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); pixDestroy(&pix); @@ -310,7 +307,7 @@ TEST_F(StringRendererTest, DoesClearBoxes) { TEST_F(StringRendererTest, DoesLigatureTextForRendering) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); renderer_->set_add_ligatures(true); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kEngNonLigatureText), renderer_->RenderToImage(kEngNonLigatureText, strlen(kEngNonLigatureText), &pix)); @@ -323,7 +320,7 @@ TEST_F(StringRendererTest, DoesLigatureTextForRendering) { TEST_F(StringRendererTest, DoesRetainInputLigatureForRendering) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kEngLigatureText), renderer_->RenderToImage(kEngLigatureText, strlen(kEngLigatureText), &pix)); @@ -346,7 +343,7 @@ TEST_F(StringRendererTest, DoesStripUnrenderableWords) { TEST_F(StringRendererTest, DoesRenderWordBoxes) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); renderer_->set_output_word_boxes(true); - Pix *pix = NULL; + Pix* pix = NULL; EXPECT_EQ(strlen(kEngText), renderer_->RenderToImage(kEngText, strlen(kEngText), &pix)); pixDestroy(&pix); @@ -369,7 +366,7 @@ TEST_F(StringRendererTest, DoesRenderWordBoxes) { TEST_F(StringRendererTest, DoesRenderWordBoxesFromMultiLineText) { renderer_.reset(new StringRenderer("Verdana 10", 600, 600)); renderer_->set_output_word_boxes(true); - Pix *pix = NULL; + Pix* pix = NULL; const char kMultlineText[] = "the quick brown fox\njumps over the lazy dog"; EXPECT_EQ(strlen(kMultlineText), renderer_->RenderToImage(kMultlineText, strlen(kEngText), &pix)); @@ -398,9 +395,8 @@ TEST_F(StringRendererTest, DoesRenderAllFontsToImage) { do { Pix* pix = NULL; font_used.clear(); - offset += renderer_->RenderAllFontsToImage(1.0, kEngText + offset, - strlen(kEngText + offset), - &font_used, &pix); + offset += renderer_->RenderAllFontsToImage( + 1.0, kEngText + offset, strlen(kEngText + offset), &font_used, &pix); if (offset < strlen(kEngText)) { EXPECT_TRUE(pix != NULL); EXPECT_STRNE("", font_used.c_str()); @@ -432,7 +428,8 @@ TEST_F(StringRendererTest, DoesDropUncoveredChars) { const string kWord = "office"; const string kCleanWord = "oice"; Pix* pix = NULL; - EXPECT_FALSE(renderer_->font().CanRenderString(kWord.c_str(), kWord.length())); + EXPECT_FALSE( + renderer_->font().CanRenderString(kWord.c_str(), kWord.length())); EXPECT_FALSE(renderer_->font().CoversUTF8Text(kWord.c_str(), kWord.length())); int offset = renderer_->RenderToImage(kWord.c_str(), kWord.length(), &pix); pixDestroy(&pix); diff --git a/unittest/tablefind_test.cc b/unittest/tablefind_test.cc index 50dd50b303..dc4f7d701a 100644 --- a/unittest/tablefind_test.cc +++ b/unittest/tablefind_test.cc @@ -25,14 +25,14 @@ namespace { class TestableTableFinder : public tesseract::TableFinder { public: - using TableFinder::set_global_median_xheight; - using TableFinder::set_global_median_blob_width; - using TableFinder::set_global_median_ledding; using TableFinder::GapInXProjection; + using TableFinder::HasLeaderAdjacent; using TableFinder::InsertLeaderPartition; using TableFinder::InsertTextPartition; + using TableFinder::set_global_median_blob_width; + using TableFinder::set_global_median_ledding; + using TableFinder::set_global_median_xheight; using TableFinder::SplitAndInsertFragmentedTextPartition; - using TableFinder::HasLeaderAdjacent; void ExpectPartition(const TBOX& box) { tesseract::ColPartitionGridSearch gsearch(&fragmented_text_grid_); @@ -75,8 +75,7 @@ class TableFinderTest : public testing::Test { } void TearDown() { - if (partition_.get() != NULL) - partition_->DeleteBoxes(); + if (partition_.get() != NULL) partition_->DeleteBoxes(); DeletePartitionListBoxes(); finder_.reset(NULL); } @@ -87,12 +86,11 @@ class TableFinderTest : public testing::Test { void MakePartition(int x_min, int y_min, int x_max, int y_max, int first_column, int last_column) { - if (partition_.get() != NULL) - partition_->DeleteBoxes(); + if (partition_.get() != NULL) partition_->DeleteBoxes(); TBOX box; box.set_to_given_coords(x_min, y_min, x_max, y_max); - partition_.reset(ColPartition::FakePartition(box, PT_UNKNOWN, - BRT_UNKNOWN, BTFT_NONE)); + partition_.reset( + ColPartition::FakePartition(box, PT_UNKNOWN, BRT_UNKNOWN, BTFT_NONE)); partition_->set_first_column(first_column); partition_->set_last_column(last_column); } @@ -119,8 +117,7 @@ class TableFinderTest : public testing::Test { } void DeletePartitionListBoxes() { - for (free_boxes_it_.mark_cycle_pt(); - !free_boxes_it_.cycled_list(); + for (free_boxes_it_.mark_cycle_pt(); !free_boxes_it_.cycled_list(); free_boxes_it_.forward()) { ColPartition* part = free_boxes_it_.data(); part->DeleteBoxes(); @@ -137,30 +134,23 @@ class TableFinderTest : public testing::Test { TEST_F(TableFinderTest, GapInXProjectionNoGap) { int data[100]; - for (int i = 0; i < 100; ++i) - data[i] = 10; + for (int i = 0; i < 100; ++i) data[i] = 10; EXPECT_FALSE(finder_->GapInXProjection(data, 100)); } TEST_F(TableFinderTest, GapInXProjectionEdgeGap) { int data[100]; - for (int i = 0; i < 10; ++i) - data[i] = 2; - for (int i = 10; i < 90; ++i) - data[i] = 10; - for (int i = 90; i < 100; ++i) - data[i] = 2; + for (int i = 0; i < 10; ++i) data[i] = 2; + for (int i = 10; i < 90; ++i) data[i] = 10; + for (int i = 90; i < 100; ++i) data[i] = 2; EXPECT_FALSE(finder_->GapInXProjection(data, 100)); } TEST_F(TableFinderTest, GapInXProjectionExists) { int data[100]; - for (int i = 0; i < 10; ++i) - data[i] = 10; - for (int i = 10; i < 90; ++i) - data[i] = 2; - for (int i = 90; i < 100; ++i) - data[i] = 10; + for (int i = 0; i < 10; ++i) data[i] = 10; + for (int i = 10; i < 90; ++i) data[i] = 2; + for (int i = 90; i < 100; ++i) data[i] = 10; EXPECT_TRUE(finder_->GapInXProjection(data, 100)); } @@ -216,18 +206,18 @@ TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicPass) { all->set_right_margin(100); TBOX blob_box = part_box; for (int i = 10; i <= 20; i += 5) { - blob_box.set_left(i+1); - blob_box.set_right(i+4); + blob_box.set_left(i + 1); + blob_box.set_right(i + 4); all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box))); } for (int i = 35; i <= 55; i += 5) { - blob_box.set_left(i+1); - blob_box.set_right(i+4); + blob_box.set_left(i + 1); + blob_box.set_right(i + 4); all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box))); } for (int i = 80; i <= 95; i += 5) { - blob_box.set_left(i+1); - blob_box.set_right(i+4); + blob_box.set_left(i + 1); + blob_box.set_right(i + 4); all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box))); } // TODO(nbeato): Ray's newer code... @@ -256,8 +246,8 @@ TEST_F(TableFinderTest, SplitAndInsertFragmentedPartitionsBasicFail) { all->set_right_margin(100); TBOX blob_box = part_box; for (int i = 10; i <= 95; i += 5) { - blob_box.set_left(i+1); - blob_box.set_right(i+4); + blob_box.set_left(i + 1); + blob_box.set_right(i + 4); all->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(blob_box))); } // TODO(nbeato): Ray's newer code... diff --git a/unittest/tablerecog_test.cc b/unittest/tablerecog_test.cc index 411a1f86e2..55d608318d 100644 --- a/unittest/tablerecog_test.cc +++ b/unittest/tablerecog_test.cc @@ -34,8 +34,8 @@ class TestableTableRecognizer : public tesseract::TableRecognizer { class TestableStructuredTable : public tesseract::StructuredTable { public: - using StructuredTable::CountVerticalIntersections; using StructuredTable::CountHorizontalIntersections; + using StructuredTable::CountVerticalIntersections; using StructuredTable::FindLinedStructure; using StructuredTable::FindWhitespacedColumns; using StructuredTable::FindWhitespacedStructure; @@ -51,11 +51,11 @@ class TestableStructuredTable : public tesseract::StructuredTable { } void ExpectCellX(int x_min, int second, int add, int almost_done, int x_max) { - ASSERT_EQ(0, (almost_done - second) % add); + ASSERT_EQ(0, (almost_done - second) % add); EXPECT_EQ(3 + (almost_done - second) / add, cell_x_.length()); EXPECT_EQ(x_min, cell_x_.get(0)); EXPECT_EQ(x_max, cell_x_.get(cell_x_.length() - 1)); - for (int i = 1; i < cell_x_.length() - 1; ++i) { + for (int i = 1; i < cell_x_.length() - 1; ++i) { EXPECT_EQ(second + add * (i - 1), cell_x_.get(i)); } } @@ -63,7 +63,7 @@ class TestableStructuredTable : public tesseract::StructuredTable { void ExpectSortedX() { EXPECT_GT(cell_x_.length(), 0); for (int i = 1; i < cell_x_.length(); ++i) { - EXPECT_LT(cell_x_.get(i-1), cell_x_.get(i)); + EXPECT_LT(cell_x_.get(i - 1), cell_x_.get(i)); } } }; @@ -92,8 +92,8 @@ class SharedTest : public testing::Test { void InsertPartition(int left, int bottom, int right, int top) { TBOX box(left, bottom, right, top); - ColPartition* part = ColPartition::FakePartition(box, PT_FLOWING_TEXT, - BRT_TEXT, BTFT_NONE); + ColPartition* part = + ColPartition::FakePartition(box, PT_FLOWING_TEXT, BRT_TEXT, BTFT_NONE); part->set_median_width(3); part->set_median_height(3); text_grid_->InsertBBox(true, true, part); @@ -103,34 +103,30 @@ class SharedTest : public testing::Test { } void InsertLines() { - line_box_.set_to_given_coords(100 - line_grid_->gridsize(), - 10 - line_grid_->gridsize(), - 450 + line_grid_->gridsize(), - 50 + line_grid_->gridsize()); - for (int i = 10; i <= 50; i += 10) - InsertHorizontalLine(100, 450, i); - for (int i = 100; i <= 450; i += 50) - InsertVerticalLine(i, 10, 50); - - for (int i = 100; i <= 200; i += 20) - InsertHorizontalLine(0, 100, i); + line_box_.set_to_given_coords( + 100 - line_grid_->gridsize(), 10 - line_grid_->gridsize(), + 450 + line_grid_->gridsize(), 50 + line_grid_->gridsize()); + for (int i = 10; i <= 50; i += 10) InsertHorizontalLine(100, 450, i); + for (int i = 100; i <= 450; i += 50) InsertVerticalLine(i, 10, 50); + + for (int i = 100; i <= 200; i += 20) InsertHorizontalLine(0, 100, i); } void InsertHorizontalLine(int left, int right, int y) { - TBOX box(left, y - line_grid_->gridsize(), - right, y + line_grid_->gridsize()); - ColPartition* part = ColPartition::FakePartition(box, PT_HORZ_LINE, - BRT_HLINE, BTFT_NONE); + TBOX box(left, y - line_grid_->gridsize(), right, + y + line_grid_->gridsize()); + ColPartition* part = + ColPartition::FakePartition(box, PT_HORZ_LINE, BRT_HLINE, BTFT_NONE); line_grid_->InsertBBox(true, true, part); tesseract::ColPartition_IT add_it(&allocated_parts_); add_it.add_after_stay_put(part); } void InsertVerticalLine(int x, int bottom, int top) { - TBOX box(x - line_grid_->gridsize(), bottom, - x + line_grid_->gridsize(), top); - ColPartition* part = ColPartition::FakePartition(box, PT_VERT_LINE, - BRT_VLINE, BTFT_NONE); + TBOX box(x - line_grid_->gridsize(), bottom, x + line_grid_->gridsize(), + top); + ColPartition* part = + ColPartition::FakePartition(box, PT_VERT_LINE, BRT_VLINE, BTFT_NONE); line_grid_->InsertBBox(true, true, part); tesseract::ColPartition_IT add_it(&allocated_parts_); @@ -273,10 +269,8 @@ TEST_F(StructuredTableTest, CountHorizontalIntersectionsAll) { } TEST_F(StructuredTableTest, VerifyLinedTableBasicPass) { - for (int y = 10; y <= 50; y += 10) - table_->InjectCellY(y); - for (int x = 100; x <= 450; x += 50) - table_->InjectCellX(x); + for (int y = 10; y <= 50; y += 10) table_->InjectCellY(y); + for (int x = 100; x <= 450; x += 50) table_->InjectCellX(x); InsertLines(); InsertCellsInLines(); table_->set_bounding_box(line_box_); @@ -284,10 +278,8 @@ TEST_F(StructuredTableTest, VerifyLinedTableBasicPass) { } TEST_F(StructuredTableTest, VerifyLinedTableHorizontalFail) { - for (int y = 10; y <= 50; y += 10) - table_->InjectCellY(y); - for (int x = 100; x <= 450; x += 50) - table_->InjectCellX(x); + for (int y = 10; y <= 50; y += 10) table_->InjectCellY(y); + for (int x = 100; x <= 450; x += 50) table_->InjectCellX(x); InsertLines(); InsertCellsInLines(); InsertPartition(101, 11, 299, 19); @@ -296,10 +288,8 @@ TEST_F(StructuredTableTest, VerifyLinedTableHorizontalFail) { } TEST_F(StructuredTableTest, VerifyLinedTableVerticalFail) { - for (int y = 10; y <= 50; y += 10) - table_->InjectCellY(y); - for (int x = 100; x <= 450; x += 50) - table_->InjectCellX(x); + for (int y = 10; y <= 50; y += 10) table_->InjectCellY(y); + for (int x = 100; x <= 450; x += 50) table_->InjectCellX(x); InsertLines(); InsertCellsInLines(); InsertPartition(151, 21, 199, 39); diff --git a/unittest/tabvector_test.cc b/unittest/tabvector_test.cc index 4c97264bc7..d4f15fdda7 100644 --- a/unittest/tabvector_test.cc +++ b/unittest/tabvector_test.cc @@ -21,12 +21,9 @@ namespace { class TabVectorTest : public testing::Test { protected: - void SetUp() { - vector_.reset(); - } + void SetUp() { vector_.reset(); } - void TearDown() { - } + void TearDown() {} void MakeSimpleTabVector(int x1, int y1, int x2, int y2) { vector_.reset(new TabVector()); @@ -60,7 +57,7 @@ TEST_F(TabVectorTest, XAtY45DegreeSlopeInRangeExact) { } TEST_F(TabVectorTest, XAtYVerticalInRangeExact) { - const int x = 120; // Arbitrary choice + const int x = 120; // Arbitrary choice MakeSimpleTabVector(x, 0, x, 100); for (int y = 0; y <= 100; ++y) { int result_x = vector_->XAtY(y); @@ -69,7 +66,7 @@ TEST_F(TabVectorTest, XAtYVerticalInRangeExact) { } TEST_F(TabVectorTest, XAtYHorizontal) { - const int y = 76; // arbitrary + const int y = 76; // arbitrary MakeSimpleTabVector(0, y, 100, y); EXPECT_EQ(0, vector_->XAtY(y)); // TODO(nbeato): What's the failure condition? @@ -93,13 +90,13 @@ TEST_F(TabVectorTest, XAtYLargeNumbers) { // Assume a document is 800 DPI, // the width of a page is 10 inches across (8000 pixels), and // the height of the page is 15 inches (12000 pixels). - MakeSimpleTabVector(7804, 504, 7968, 11768); // Arbitrary for vertical line - int x = vector_->XAtY(6136); // test mid point + MakeSimpleTabVector(7804, 504, 7968, 11768); // Arbitrary for vertical line + int x = vector_->XAtY(6136); // test mid point EXPECT_EQ(7886, x); } TEST_F(TabVectorTest, XAtYHorizontalInRangeExact) { - const int y = 120; // Arbitrary choice + const int y = 120; // Arbitrary choice MakeSimpleTabVector(50, y, 150, y); int x = vector_->XAtY(y); @@ -129,4 +126,4 @@ TEST_F(TabVectorTest, XYFlip) { EXPECT_EQ(3, vector_->endpt().y()); } -} // namespace +} // namespace diff --git a/unittest/tatweel_test.cc b/unittest/tatweel_test.cc index 02cdc7aa93..98baf8a5d2 100644 --- a/unittest/tatweel_test.cc +++ b/unittest/tatweel_test.cc @@ -25,8 +25,7 @@ class TatweelTest : public ::testing::Test { } string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } UNICHARSET unicharset_; }; diff --git a/unittest/textlineprojection_test.cc b/unittest/textlineprojection_test.cc index e8ba82d3eb..a0eaa91bbc 100644 --- a/unittest/textlineprojection_test.cc +++ b/unittest/textlineprojection_test.cc @@ -20,17 +20,14 @@ using tesseract::TextlineProjection; // NOTE: Keep in sync with textlineprojection.cc. const int kMinStrongTextValue = 6; - // The fixture for testing Tesseract. class TextlineProjectionTest : public testing::Test { protected: string TestDataNameToPath(const string& name) { - return file::JoinPath(FLAGS_test_srcdir, - "testdata/" + name); + return file::JoinPath(FLAGS_test_srcdir, "testdata/" + name); } string TessdataPath() { - return file::JoinPath(FLAGS_test_srcdir, - "tessdata"); + return file::JoinPath(FLAGS_test_srcdir, "tessdata"); } string OutputNameToPath(const string& name) { return file::JoinPath(FLAGS_test_tmpdir, name); @@ -118,16 +115,15 @@ class TextlineProjectionTest : public testing::Test { const char* text, const char* message) { int value = projection_->EvaluateBox(box, denorm_, false); if (greater_or_equal != (value > target_value)) { - LOG(INFO) - << StringPrintf("EvaluateBox too %s:%d vs %d for %s word '%s' at:", - greater_or_equal ? "low" : "high", value, - target_value, - message, text); + LOG(INFO) << StringPrintf( + "EvaluateBox too %s:%d vs %d for %s word '%s' at:", + greater_or_equal ? "low" : "high", value, target_value, message, + text); box.print(); value = projection_->EvaluateBox(box, denorm_, true); } else { - VLOG(1) << StringPrintf("EvaluateBox OK(%d) for %s word '%s'", - value, message, text); + VLOG(1) << StringPrintf("EvaluateBox OK(%d) for %s word '%s'", value, + message, text); } if (greater_or_equal) { EXPECT_GE(value, target_value); @@ -139,12 +135,12 @@ class TextlineProjectionTest : public testing::Test { // Helper evaluates the DistanceOfBoxFromBox function by expecting that // box should be nearer to true_box than false_box. void EvaluateDistance(const TBOX& box, const TBOX& true_box, - const TBOX& false_box, - const char* text, const char* message) { - int true_dist = projection_->DistanceOfBoxFromBox(box, true_box, true, - denorm_, false); - int false_dist = projection_->DistanceOfBoxFromBox(box, false_box, true, - denorm_, false); + const TBOX& false_box, const char* text, + const char* message) { + int true_dist = + projection_->DistanceOfBoxFromBox(box, true_box, true, denorm_, false); + int false_dist = + projection_->DistanceOfBoxFromBox(box, false_box, true, denorm_, false); if (false_dist <= true_dist) { LOG(INFO) << StringPrintf("Distance wrong:%d vs %d for %s word '%s' at:", false_dist, true_dist, message, text); @@ -194,8 +190,7 @@ class TextlineProjectionTest : public testing::Test { TBOX lower_box = word_box; lower_box.set_top(word_box.bottom()); lower_box.set_bottom(word_box.bottom() - padding); - if (tall_word) - lower_box.move(ICOORD(0, padding / 2)); + if (tall_word) lower_box.move(ICOORD(0, padding / 2)); EvaluateBox(lower_box, false, kMinStrongTextValue, text, "Lower Word"); EvaluateBox(lower_box, true, -1, text, "Lower Word not vertical"); @@ -224,20 +219,19 @@ class TextlineProjectionTest : public testing::Test { TBOX upper_challenger(upper_box); upper_challenger.set_bottom(upper_box.top()); upper_challenger.set_top(upper_box.top() + word_box.height()); - EvaluateDistance(upper_box, target_box, upper_challenger, - text, "Upper Word"); - if (tall_word) - lower_box.move(ICOORD(0, padding / 2)); + EvaluateDistance(upper_box, target_box, upper_challenger, text, + "Upper Word"); + if (tall_word) lower_box.move(ICOORD(0, padding / 2)); lower_box.set_bottom(lower_box.top() - padding); target_box = word_box; target_box.set_bottom(lower_box.top()); TBOX lower_challenger(lower_box); lower_challenger.set_top(lower_box.bottom()); lower_challenger.set_bottom(lower_box.bottom() - word_box.height()); - EvaluateDistance(lower_box, target_box, lower_challenger, - text, "Lower Word"); + EvaluateDistance(lower_box, target_box, lower_challenger, text, + "Lower Word"); - delete [] text; + delete[] text; } while (it->Next(tesseract::RIL_WORD)); delete it; } @@ -254,13 +248,9 @@ class TextlineProjectionTest : public testing::Test { }; // Tests all word boxes on an unrotated image. -TEST_F(TextlineProjectionTest, Unrotated) { - VerifyBoxes("phototest.tif", 31); -} +TEST_F(TextlineProjectionTest, Unrotated) { VerifyBoxes("phototest.tif", 31); } // Tests character-level applyboxes on italic Times New Roman. -TEST_F(TextlineProjectionTest, Rotated) { - VerifyBoxes("phototestrot.tif", 31); -} +TEST_F(TextlineProjectionTest, Rotated) { VerifyBoxes("phototestrot.tif", 31); } } // namespace diff --git a/unittest/tfile_test.cc b/unittest/tfile_test.cc index 7c54a0c413..d7f08d3db3 100644 --- a/unittest/tfile_test.cc +++ b/unittest/tfile_test.cc @@ -23,8 +23,7 @@ namespace { class TfileTest : public ::testing::Test { protected: - TfileTest() { - } + TfileTest() {} // Some data to serialize. class MathData { @@ -32,11 +31,9 @@ class TfileTest : public ::testing::Test { MathData() : num_squares_(0), num_triangles_(0) {} void Setup() { // Setup some data. - for (int s = 0; s < 42; ++s) - squares_.push_back(s * s); + for (int s = 0; s < 42; ++s) squares_.push_back(s * s); num_squares_ = squares_.size(); - for (int t = 0; t < 52; ++t) - triangles_.push_back(t * (t + 1) / 2); + for (int t = 0; t < 52; ++t) triangles_.push_back(t * (t + 1) / 2); num_triangles_ = triangles_.size(); } void ExpectEq(const MathData& other) { @@ -52,7 +49,7 @@ class TfileTest : public ::testing::Test { if (fp->FWrite(&num_squares_, sizeof(num_squares_), 1) != 1) return false; if (!squares_.Serialize(fp)) return false; if (fp->FWrite(&num_triangles_, sizeof(num_triangles_), 1) != 1) - return false; + return false; if (!triangles_.Serialize(fp)) return false; return true; } diff --git a/unittest/unicharcompress_test.cc b/unittest/unicharcompress_test.cc index 4f97ca3b1e..0e14a32714 100644 --- a/unittest/unicharcompress_test.cc +++ b/unittest/unicharcompress_test.cc @@ -10,8 +10,8 @@ // limitations under the License. #include "unicharcompress.h" #include "gunit.h" -#include "serialis.h" #include "printf.h" +#include "serialis.h" namespace tesseract { namespace { @@ -21,11 +21,9 @@ class UnicharcompressTest : public ::testing::Test { // Loads and compresses the given unicharset. void LoadUnicharset(const string& unicharset_name) { string radical_stroke_file = - file::JoinPath(FLAGS_test_srcdir, - "langdata/radical-stroke.txt"); - string unicharset_file = file::JoinPath( - FLAGS_test_srcdir, "testdata", - unicharset_name); + file::JoinPath(FLAGS_test_srcdir, "langdata/radical-stroke.txt"); + string unicharset_file = + file::JoinPath(FLAGS_test_srcdir, "testdata", unicharset_name); string uni_data; CHECK_OK(file::GetContents(unicharset_file, &uni_data, file::Defaults())); string radical_data; diff --git a/unittest/unicharset_test.cc b/unittest/unicharset_test.cc index 2c3bb47c8f..8a5ad0b925 100644 --- a/unittest/unicharset_test.cc +++ b/unittest/unicharset_test.cc @@ -128,9 +128,8 @@ TEST(UnicharsetTest, MultibyteBigrams) { TEST(UnicharsetTest, OldStyle) { // This test verifies an old unicharset that contains fi/fl ligatures loads // and keeps all the entries. - string filename = file::JoinPath(FLAGS_test_srcdir, - "testdata", - "eng.unicharset"); + string filename = + file::JoinPath(FLAGS_test_srcdir, "testdata", "eng.unicharset"); UNICHARSET u; LOG(INFO) << "Filename=" << filename; EXPECT_TRUE(u.load_from_file(filename.c_str())); diff --git a/unittest/validator_test.cc b/unittest/validator_test.cc index dd9923867c..40c8413f99 100644 --- a/unittest/validator_test.cc +++ b/unittest/validator_test.cc @@ -11,8 +11,8 @@ #include "validator.h" +#include "gmock/gmock.h" // for testing::ElementsAreArray #include "include_gunit.h" -#include "gmock/gmock.h" // for testing::ElementsAreArray namespace tesseract { namespace {