From a18620cfea33d03032b71fe1b9fc424777e34252 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Fri, 8 Sep 2017 09:37:03 +0100 Subject: [PATCH] Improved results on images with no resolution. Estimates resolution from the size of the connected components, based on average text size. --- api/baseapi.cpp | 13 ++----------- ccmain/osdetect.cpp | 3 --- ccmain/pagesegmain.cpp | 15 ++++++++++++--- ccstruct/publictypes.h | 14 +++++++++++++- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index c473a90f9e..f2b42276e2 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -102,15 +102,6 @@ const char* kInputFile = "noname.tif"; const char* kOldVarsFile = "failed_vars.txt"; /** Max string length of an int. */ const int kMaxIntSize = 22; -/** - * Minimum believable resolution. Used as a default if there is no other - * information, as it is safer to under-estimate than over-estimate. - */ -const int kMinCredibleResolution = 70; -/** Maximum believable resolution. */ -const int kMaxCredibleResolution = 2400; -/** Default resolution. */ -const int kDefaultResolution = 300; /* Add all available languages recursively. */ @@ -2225,8 +2216,8 @@ bool TessBaseAPI::Threshold(Pix** pix) { // Use the minimum default resolution, as it is safer to under-estimate // than over-estimate resolution. tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res, - kDefaultResolution); - thresholder_->SetSourceYResolution(kDefaultResolution); + kMinCredibleResolution); + thresholder_->SetSourceYResolution(kMinCredibleResolution); } PageSegMode pageseg_mode = static_cast( diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp index 998b3bed3c..af4893797a 100644 --- a/ccmain/osdetect.cpp +++ b/ccmain/osdetect.cpp @@ -58,9 +58,6 @@ const char* ScriptDetector::korean_script_ = "Korean"; const char* ScriptDetector::japanese_script_ = "Japanese"; const char* ScriptDetector::fraktur_script_ = "Fraktur"; -// Minimum believable resolution. -const int kMinCredibleResolution = 70; - void OSResults::update_best_orientation() { float first = orientations[0]; float second = orientations[1]; diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp index d481dc662f..855f1f12e6 100644 --- a/ccmain/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -310,13 +310,22 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( TO_BLOCK* to_block = to_block_it.data(); TBOX blkbox = to_block->block->bounding_box(); ColumnFinder* finder = NULL; + int estimated_resolution = source_resolution_; + if (source_resolution_ == kMinCredibleResolution) { + // Try to estimate resolution from typical body text size. + int res = IntCastRounded(to_block->line_size * kResolutionEstimationFactor); + if (res > estimated_resolution && res < kMaxCredibleResolution) { + estimated_resolution = res; + tprintf("Estimating resolution as %d\n", estimated_resolution); + } + } if (to_block->line_size >= 2) { finder = new ColumnFinder(static_cast(to_block->line_size), blkbox.botleft(), blkbox.topright(), - source_resolution_, textord_use_cjk_fp_model, - textord_tabfind_aligned_gap_fraction, - &v_lines, &h_lines, vertical_x, vertical_y); + estimated_resolution, textord_use_cjk_fp_model, + textord_tabfind_aligned_gap_fraction, &v_lines, + &h_lines, vertical_x, vertical_y); finder->SetupAndFilterNoise(pageseg_mode, *photo_mask_pix, to_block); diff --git a/ccstruct/publictypes.h b/ccstruct/publictypes.h index a3428658f3..f0c83ab0d3 100644 --- a/ccstruct/publictypes.h +++ b/ccstruct/publictypes.h @@ -30,7 +30,19 @@ // API-level code should include apitypes.h in preference to this file. /** Number of printers' points in an inch. The unit of the pointsize return. */ -const int kPointsPerInch = 72; +constexpr int kPointsPerInch = 72; +/** + * Minimum believable resolution. Used as a default if there is no other + * information, as it is safer to under-estimate than over-estimate. + */ +constexpr int kMinCredibleResolution = 70; +/** Maximum believable resolution. */ +constexpr int kMaxCredibleResolution = 2400; +/** + * Ratio between median blob size and likely resolution. Used to estimate + * resolution when none is provided. This is basically 1/usual text size in + * inches. */ +constexpr int kResolutionEstimationFactor = 10; /** * Possible types for a POLY_BLOCK or ColPartition.