From a1c22fb0d0f6bde165ec7b7c3125420b0ba1d541 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Wed, 25 Jan 2017 16:05:59 -0800 Subject: [PATCH] Fixed issue #557 --- api/tesseractmain.cpp | 2 +- ccmain/osdetect.cpp | 2 +- ccmain/pagesegmain.cpp | 52 +++++--------------- ccstruct/Makefile.am | 2 +- ccstruct/debugpixa.h | 52 ++++++++++++++++++++ classify/trainingsampleset.cpp | 81 ------------------------------- classify/trainingsampleset.h | 4 -- textord/alignedblob.cpp | 20 -------- textord/alignedblob.h | 18 ------- textord/baselinedetect.cpp | 21 +------- textord/baselinedetect.h | 7 --- textord/colfind.cpp | 25 +++------- textord/colfind.h | 7 +-- textord/colpartitiongrid.cpp | 40 --------------- textord/colpartitiongrid.h | 5 -- textord/devanagari_processing.cpp | 18 +++---- textord/devanagari_processing.h | 7 ++- textord/imagefind.cpp | 51 ++++++++++--------- textord/imagefind.h | 15 +++--- textord/tabfind.cpp | 35 +------------ textord/tablefind.cpp | 78 ----------------------------- textord/tablefind.h | 5 -- textord/textlineprojection.cpp | 7 +-- 23 files changed, 131 insertions(+), 423 deletions(-) create mode 100644 ccstruct/debugpixa.h diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 7cd6ff8427..bdaa6f8046 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -403,7 +403,7 @@ int main(int argc, char** argv) { #if !defined(DEBUG) // Disable debugging and informational messages from Leptonica. - setMsgSeverity(L_SEVERITY_WARNING); + setMsgSeverity(L_SEVERITY_ERROR); #endif #if defined(HAVE_TIFFIO_H) && defined(_WIN32) diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp index 69e626222f..998b3bed3c 100644 --- a/ccmain/osdetect.cpp +++ b/ccmain/osdetect.cpp @@ -176,7 +176,7 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks, tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y, NULL, &v_lines, &h_lines); - Pix* im_pix = tesseract::ImageFind::FindImages(pix); + Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr); if (im_pix != NULL) { pixSubtract(pix, pix, im_pix); pixDestroy(&im_pix); diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp index d815365137..d481dc662f 100644 --- a/ccmain/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -37,6 +37,7 @@ #include "blobbox.h" #include "blread.h" #include "colfind.h" +#include "debugpixa.h" #include "equationdetect.h" #include "imagefind.h" #include "linefind.h" @@ -176,28 +177,6 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks, return auto_page_seg_ret_val; } -// Helper writes a grey image to a file for use by scrollviewer. -// Normally for speed we don't display the image in the layout debug windows. -// If textord_debug_images is true, we draw the image as a background to some -// of the debug windows. printable determines whether these -// images are optimized for printing instead of screen display. -static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) { - Pix* grey_pix = pixCreate(pixGetWidth(pix_binary), - pixGetHeight(pix_binary), 8); - // Printable images are light grey on white, but for screen display - // they are black on dark grey so the other colors show up well. - if (printable) { - pixSetAll(grey_pix); - pixSetMasked(grey_pix, pix_binary, 192); - } else { - pixSetAllArbitrary(grey_pix, 64); - pixSetMasked(grey_pix, pix_binary, 0); - } - AlignedBlob::IncrementDebugPix(); - pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG); - pixDestroy(&grey_pix); -} - /** * Auto page segmentation. Divide the page image into blocks of uniform * text linespacing and images. @@ -226,9 +205,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks, BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess, OSResults* osr) { - if (textord_debug_images) { - WriteDebugBackgroundImage(textord_debug_printable, pix_binary_); - } Pix* photomask_pix = NULL; Pix* musicmask_pix = NULL; // The blocks made by the ColumnFinder. Moved to blocks before return. @@ -250,9 +226,10 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, if (equ_detect_) { finder->SetEquationDetect(equ_detect_); } - result = finder->FindBlocks( - pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix, - pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks); + result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_, + to_block, photomask_pix, pix_thresholds_, + pix_grey_, &pixa_debug_, &found_blocks, + diacritic_blobs, to_blocks); if (result >= 0) finder->GetDeskewVectors(&deskew_, &reskew_); delete finder; @@ -265,11 +242,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks, BLOCK_IT block_it(blocks); // Move the found blocks to the input/output blocks. block_it.add_list_after(&found_blocks); - - if (textord_debug_images) { - // The debug image is no longer needed so delete it. - unlink(AlignedBlob::textord_debug_pix().string()); - } return result; } @@ -311,19 +283,21 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( ASSERT_HOST(pix_binary_ != NULL); if (tessedit_dump_pageseg_images) { - pixWrite("tessinput.png", pix_binary_, IFF_PNG); + pixa_debug_.AddPix(pix_binary_, "PageSegInput"); } // Leptonica is used to find the rule/separator lines in the input. LineFinder::FindAndRemoveLines(source_resolution_, textord_tabfind_show_vlines, pix_binary_, &vertical_x, &vertical_y, music_mask_pix, &v_lines, &h_lines); - if (tessedit_dump_pageseg_images) - pixWrite("tessnolines.png", pix_binary_, IFF_PNG); + if (tessedit_dump_pageseg_images) { + pixa_debug_.AddPix(pix_binary_, "NoLines"); + } // Leptonica is used to find a mask of the photo regions in the input. - *photo_mask_pix = ImageFind::FindImages(pix_binary_); - if (tessedit_dump_pageseg_images) - pixWrite("tessnoimages.png", pix_binary_, IFF_PNG); + *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_); + if (tessedit_dump_pageseg_images) { + pixa_debug_.AddPix(pix_binary_, "NoImages"); + } if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear(); // The rest of the algorithm uses the usual connected components. diff --git a/ccstruct/Makefile.am b/ccstruct/Makefile.am index 8a26a684d6..2f9dc03679 100644 --- a/ccstruct/Makefile.am +++ b/ccstruct/Makefile.am @@ -12,7 +12,7 @@ endif include_HEADERS = publictypes.h noinst_HEADERS = \ blamer.h blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \ - detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \ + debugpixa.h detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \ imagedata.h \ ipoints.h \ linlsq.h matrix.h mod128.h normalis.h \ diff --git a/ccstruct/debugpixa.h b/ccstruct/debugpixa.h new file mode 100644 index 0000000000..37e6321563 --- /dev/null +++ b/ccstruct/debugpixa.h @@ -0,0 +1,52 @@ +#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_ +#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_ + +#include "allheaders.h" + +namespace tesseract { + +// Class to hold a Pixa collection of debug images with captions and save them +// to a PDF file. +class DebugPixa { + public: + // TODO(rays) add another constructor with size control. + DebugPixa() { + pixa_ = pixaCreate(0); + fonts_ = bmfCreate(nullptr, 14); + } + // If the filename_ has been set and there are any debug images, they are + // written to the set filename_. + ~DebugPixa() { + pixaDestroy(&pixa_); + bmfDestroy(&fonts_); + } + + // Adds the given pix to the set of pages in the PDF file, with the given + // caption added to the top. + void AddPix(const Pix* pix, const char* caption) { + int depth = pixGetDepth(const_cast(pix)); + int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80); + Pix* pix_debug = pixAddSingleTextblock( + const_cast(pix), fonts_, caption, color, L_ADD_BELOW, nullptr); + pixaAddPix(pixa_, pix_debug, L_INSERT); + } + + // Sets the destination filename and enables images to be written to a PDF + // on destruction. + void WritePDF(const char* filename) { + if (pixaGetCount(pixa_) > 0) { + pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename); + pixaClear(pixa_); + } + } + + private: + // The collection of images to put in the PDF. + Pixa* pixa_; + // The fonts used to draw text captions. + L_Bmf* fonts_; +}; + +} // namespace tesseract + +#endif // TESSERACT_CCSTRUCT_DEBUGPIXA_H_ diff --git a/classify/trainingsampleset.cpp b/classify/trainingsampleset.cpp index 93936fcae6..3d73825664 100644 --- a/classify/trainingsampleset.cpp +++ b/classify/trainingsampleset.cpp @@ -487,81 +487,6 @@ void TrainingSampleSet::IndexFeatures(const IntFeatureSpace& feature_space) { samples_[s]->IndexFeatures(feature_space); } -// Delete outlier samples with few features that are shared with others. -// IndexFeatures must have been called already. -void TrainingSampleSet::DeleteOutliers(const IntFeatureSpace& feature_space, - bool debug) { - if (font_class_array_ == NULL) - OrganizeByFontAndClass(); - Pixa* pixa = NULL; - if (debug) - pixa = pixaCreate(0); - GenericVector feature_counts; - int fs_size = feature_space.Size(); - int font_size = font_id_map_.CompactSize(); - for (int font_index = 0; font_index < font_size; ++font_index) { - for (int c = 0; c < unicharset_size_; ++c) { - // Create a histogram of the features used by all samples of this - // font/class combination. - feature_counts.init_to_size(fs_size, 0); - FontClassInfo& fcinfo = (*font_class_array_)(font_index, c); - int sample_count = fcinfo.samples.size(); - if (sample_count < kMinOutlierSamples) - continue; - for (int i = 0; i < sample_count; ++i) { - int s = fcinfo.samples[i]; - const GenericVector& features = samples_[s]->indexed_features(); - for (int f = 0; f < features.size(); ++f) { - ++feature_counts[features[f]]; - } - } - for (int i = 0; i < sample_count; ++i) { - int s = fcinfo.samples[i]; - const TrainingSample& sample = *samples_[s]; - const GenericVector& features = sample.indexed_features(); - // A feature that has a histogram count of 1 is only used by this - // sample, making it 'bad'. All others are 'good'. - int good_features = 0; - int bad_features = 0; - for (int f = 0; f < features.size(); ++f) { - if (feature_counts[features[f]] > 1) - ++good_features; - else - ++bad_features; - } - // If more than 1/3 features are bad, then this is an outlier. - if (bad_features * 2 > good_features) { - tprintf("Deleting outlier sample of %s, %d good, %d bad\n", - SampleToString(sample).string(), - good_features, bad_features); - if (debug) { - pixaAddPix(pixa, sample.RenderToPix(&unicharset_), L_INSERT); - // Add the previous sample as well, so it is easier to see in - // the output what is wrong with this sample. - int t; - if (i == 0) - t = fcinfo.samples[1]; - else - t = fcinfo.samples[i - 1]; - const TrainingSample &csample = *samples_[t]; - pixaAddPix(pixa, csample.RenderToPix(&unicharset_), L_INSERT); - } - // Mark the sample for deletion. - KillSample(samples_[s]); - } - } - } - } - // Truly delete all bad samples and renumber everything. - DeleteDeadSamples(); - if (pixa != NULL) { - Pix* pix = pixaDisplayTiledInRows(pixa, 1, 2600, 1.0, 0, 10, 10); - pixaDestroy(&pixa); - pixWrite("outliers.png", pix, IFF_PNG); - pixDestroy(&pix); - } -} - // Marks the given sample index for deletion. // Deletion is actually completed by DeleteDeadSamples. void TrainingSampleSet::KillSample(TrainingSample* sample) { @@ -745,12 +670,6 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map, if (debug) { tprintf("Global worst dist = %g, between sample %d and %d\n", global_worst_dist, worst_s1, worst_s2); - Pix* pix1 = DebugSample(unicharset_, samples_[worst_s1]); - Pix* pix2 = DebugSample(unicharset_, samples_[worst_s2]); - pixOr(pix1, pix1, pix2); - pixWrite("worstpair.png", pix1, IFF_PNG); - pixDestroy(&pix1); - pixDestroy(&pix2); } } diff --git a/classify/trainingsampleset.h b/classify/trainingsampleset.h index e7010523d8..f48ad5ff5c 100644 --- a/classify/trainingsampleset.h +++ b/classify/trainingsampleset.h @@ -171,10 +171,6 @@ class TrainingSampleSet { // Generates indexed features for all samples with the supplied feature_space. void IndexFeatures(const IntFeatureSpace& feature_space); - // Delete outlier samples with few features that are shared with others. - // IndexFeatures must have been called already. - void DeleteOutliers(const IntFeatureSpace& feature_space, bool debug); - // Marks the given sample for deletion. // Deletion is actually completed by DeleteDeadSamples. void KillSample(TrainingSample* sample); diff --git a/textord/alignedblob.cpp b/textord/alignedblob.cpp index 0dab26bf8d..1aee958fce 100644 --- a/textord/alignedblob.cpp +++ b/textord/alignedblob.cpp @@ -30,7 +30,6 @@ INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle"); INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle"); INT_VAR(textord_testregion_right, MAX_INT32, "Right edge of debug rectangle"); INT_VAR(textord_testregion_bottom, MAX_INT32, "Bottom edge of debug rectangle"); -BOOL_VAR(textord_debug_images, false, "Use greyed image background for debug"); BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); namespace tesseract { @@ -64,25 +63,6 @@ const double kMinTabGradient = 4.0; // If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. const int kMaxSkewFactor = 15; -// Constant part of textord_debug_pix_. -const char* kTextordDebugPix = "psdebug_pix"; - -// Name of image file to use if textord_debug_images is true. -STRING AlignedBlob::textord_debug_pix_ = kTextordDebugPix; -// Index to image file to use if textord_debug_images is true. -int AlignedBlob::debug_pix_index_ = 0; - -// Increment the serial number counter and set the string to use -// for a filename if textord_debug_images is true. -void AlignedBlob::IncrementDebugPix() { - ++debug_pix_index_; - textord_debug_pix_ = kTextordDebugPix; - char numbuf[32]; - snprintf(numbuf, sizeof(numbuf), "%d", debug_pix_index_); - textord_debug_pix_ += numbuf; - textord_debug_pix_ += ".pix"; -} - // Constructor to set the parameters for finding aligned and ragged tabs. // Vertical_x and vertical_y are the current estimates of the true vertical // direction (up) in the image. Height is the height of the starter blob. diff --git a/textord/alignedblob.h b/textord/alignedblob.h index d5e9d26a2c..fc770e7812 100644 --- a/textord/alignedblob.h +++ b/textord/alignedblob.h @@ -29,8 +29,6 @@ extern INT_VAR_H(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding"); -extern BOOL_VAR_H(textord_debug_images, false, - "Use greyed image background for debug"); extern BOOL_VAR_H(textord_debug_printable, false, "Make debug windows printable"); @@ -102,17 +100,6 @@ class AlignedBlob : public BlobGrid { BLOBNBOX* bbox, int* vertical_x, int* vertical_y); - // Increment the serial number counter and set the string to use - // for a filename if textord_debug_images is true. - static void IncrementDebugPix(); - - // Return the string to use for a filename if textord_debug_images is true. - // Use IncrementDebugPix first to set the filename, and each time is - // to be incremented. - static const STRING& textord_debug_pix() { - return textord_debug_pix_; - } - private: // Find a set of blobs that are aligned in the given vertical // direction with the given blob. Returns a list of aligned @@ -132,11 +119,6 @@ class AlignedBlob : public BlobGrid { BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p, bool top_to_bottom, BLOBNBOX* bbox, int x_start, int* end_y); - - // Name of image file to use if textord_debug_images is true. - static STRING textord_debug_pix_; - // Index to image file to use if textord_debug_images is true. - static int debug_pix_index_; }; } // namespace tesseract. diff --git a/textord/baselinedetect.cpp b/textord/baselinedetect.cpp index 9bbd999e15..d90f2b1a31 100644 --- a/textord/baselinedetect.cpp +++ b/textord/baselinedetect.cpp @@ -782,11 +782,9 @@ double BaselineBlock::FitLineSpacingModel( return rms_error; } - BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew, TO_BLOCK_LIST* blocks) - : page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL), - debug_file_prefix_("") { + : page_skew_(page_skew), debug_level_(debug_level) { TO_BLOCK_IT it(blocks); for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { TO_BLOCK* to_block = it.data(); @@ -804,7 +802,6 @@ BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew, } BaselineDetect::~BaselineDetect() { - pixDestroy(&pix_debug_); } // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers @@ -847,31 +844,15 @@ void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, bool remove_noise, bool show_final_rows, Textord* textord) { - Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL; for (int i = 0; i < blocks_.size(); ++i) { BaselineBlock* bl_block = blocks_[i]; if (enable_splines) bl_block->PrepareForSplineFitting(page_tr, remove_noise); bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord); - if (pix_spline) { - bl_block->DrawPixSpline(pix_spline); - } if (show_final_rows) { bl_block->DrawFinalRows(page_tr); } } - - if (pix_spline) { - STRING outfile_name = debug_file_prefix_ + "_spline.png"; - pixWrite(outfile_name.string(), pix_spline, IFF_PNG); - pixDestroy(&pix_spline); - } -} - -void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) { - pixDestroy(&pix_debug_); - pix_debug_ = pixClone(pixIn); - debug_file_prefix_ = output_path; } } // namespace tesseract. diff --git a/textord/baselinedetect.h b/textord/baselinedetect.h index 7a47931fc2..8d11bdffe0 100644 --- a/textord/baselinedetect.h +++ b/textord/baselinedetect.h @@ -262,10 +262,6 @@ class BaselineDetect { bool show_final_rows, Textord* textord); - // Set up the image and filename, so that a debug image with the detected - // baseline rendered will be saved. - void SetDebugImage(Pix* pixIn, const STRING& output_path); - private: // Average (median) skew of the blocks on the page among those that have // a good angle of their own. @@ -274,9 +270,6 @@ class BaselineDetect { int debug_level_; // The blocks that we are working with. PointerVector blocks_; - - Pix* pix_debug_; - STRING debug_file_prefix_; }; } // namespace tesseract diff --git a/textord/colfind.cpp b/textord/colfind.cpp index ae48e9ffa5..44e2724246 100644 --- a/textord/colfind.cpp +++ b/textord/colfind.cpp @@ -290,8 +290,8 @@ void ColumnFinder::CorrectOrientation(TO_BLOCK* block, int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor, TO_BLOCK* input_block, Pix* photo_mask_pix, Pix* thresholds_pix, - Pix* grey_pix, BLOCK_LIST* blocks, - BLOBNBOX_LIST* diacritic_blobs, + Pix* grey_pix, DebugPixa* pixa_debug, + BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks) { pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); stroke_width_->FindLeaderPartitions(input_block, &part_grid_); @@ -304,11 +304,13 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, &projection_, diacritic_blobs, &part_grid_, &big_parts_); if (!PSM_SPARSE(pageseg_mode)) { ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, - input_block, this, &part_grid_, &big_parts_); + input_block, this, pixa_debug, &part_grid_, + &big_parts_); ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, photo_mask_pix); ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, - input_block, this, &part_grid_, &big_parts_); + input_block, this, pixa_debug, &part_grid_, + &big_parts_); } part_grid_.ReTypeBlobs(&image_bblobs_); TidyBlobs(input_block); @@ -441,9 +443,6 @@ int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, if (textord_tabfind_show_partitions) { ScrollView* window = MakeWindow(400, 300, "Partitions"); if (window != NULL) { - if (textord_debug_images) - window->Image(AlignedBlob::textord_debug_pix().string(), - image_origin().x(), image_origin().y()); part_grid_.DisplayBoxes(window); if (!textord_debug_printable) DisplayTabVectors(window); @@ -519,11 +518,7 @@ void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { blocks_win_ = MakeWindow(700, 300, "Blocks"); else blocks_win_->Clear(); - if (textord_debug_images) - blocks_win_->Image(AlignedBlob::textord_debug_pix().string(), - image_origin().x(), image_origin().y()); - else - DisplayBoxes(blocks_win_); + DisplayBoxes(blocks_win_); BLOCK_IT block_it(blocks); int serial = 1; for (block_it.mark_cycle_pt(); !block_it.cycled_list(); @@ -543,11 +538,7 @@ void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { #ifndef GRAPHICS_DISABLED ScrollView* col_win = MakeWindow(50, 300, "Columns"); - if (textord_debug_images) - col_win->Image(AlignedBlob::textord_debug_pix().string(), - image_origin().x(), image_origin().y()); - else - DisplayBoxes(col_win); + DisplayBoxes(col_win); col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); for (int i = 0; i < gridheight_; ++i) { ColPartitionSet* columns = best_columns_[i]; diff --git a/textord/colfind.h b/textord/colfind.h index 60f7f00ec6..1918f41bd0 100644 --- a/textord/colfind.h +++ b/textord/colfind.h @@ -20,11 +20,12 @@ #ifndef TESSERACT_TEXTORD_COLFIND_H_ #define TESSERACT_TEXTORD_COLFIND_H_ -#include "tabfind.h" -#include "imagefind.h" #include "colpartitiongrid.h" #include "colpartitionset.h" +#include "debugpixa.h" +#include "imagefind.h" #include "ocrblock.h" +#include "tabfind.h" #include "textlineprojection.h" class BLOCK_LIST; @@ -163,7 +164,7 @@ class ColumnFinder : public TabFind { // in debug mode, which requests a retry with more debug info. int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor, TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix, - Pix* grey_pix, BLOCK_LIST* blocks, + Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks); // Get the rotation required to deskew, and its inverse rotation. diff --git a/textord/colpartitiongrid.cpp b/textord/colpartitiongrid.cpp index 86c883280e..901884e7a2 100644 --- a/textord/colpartitiongrid.cpp +++ b/textord/colpartitiongrid.cpp @@ -645,46 +645,6 @@ bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, return any_changed; } -// Compute the mean RGB of the light and dark pixels in each ColPartition -// and also the rms error in the linearity of color. -void ColPartitionGrid::ComputePartitionColors(Pix* scaled_color, - int scaled_factor, - const FCOORD& rerotation) { - if (scaled_color == NULL) - return; - Pix* color_map1 = NULL; - Pix* color_map2 = NULL; - Pix* rms_map = NULL; - if (textord_tabfind_show_color_fit) { - int width = pixGetWidth(scaled_color); - int height = pixGetHeight(scaled_color); - color_map1 = pixCreate(width, height, 32); - color_map2 = pixCreate(width, height, 32); - rms_map = pixCreate(width, height, 8); - } - // Iterate the ColPartitions in the grid. - ColPartitionGridSearch gsearch(this); - gsearch.StartFullSearch(); - ColPartition* part; - while ((part = gsearch.NextFullSearch()) != NULL) { - TBOX part_box = part->bounding_box(); - part_box.rotate_large(rerotation); - ImageFind::ComputeRectangleColors(part_box, scaled_color, - scaled_factor, - color_map1, color_map2, rms_map, - part->color1(), part->color2()); - } - if (color_map1 != NULL) { - pixWrite("swcolorinput.png", scaled_color, IFF_PNG); - pixWrite("swcolor1.png", color_map1, IFF_PNG); - pixWrite("swcolor2.png", color_map2, IFF_PNG); - pixWrite("swrms.png", rms_map, IFF_PNG); - pixDestroy(&color_map1); - pixDestroy(&color_map2); - pixDestroy(&rms_map); - } -} - // Reflects the grid and its colpartitions in the y-axis, assuming that // all blob boxes have already been done. void ColPartitionGrid::ReflectInYAxis() { diff --git a/textord/colpartitiongrid.h b/textord/colpartitiongrid.h index a8926e4fcb..1e5b756e10 100644 --- a/textord/colpartitiongrid.h +++ b/textord/colpartitiongrid.h @@ -106,11 +106,6 @@ class ColPartitionGrid : public BBGridAddPix(debug_image_, + split_for_pageseg ? "pageseg_split" : "ocr_split"); } return true; } diff --git a/textord/devanagari_processing.h b/textord/devanagari_processing.h index 0d070decd0..f95ef25a4e 100644 --- a/textord/devanagari_processing.h +++ b/textord/devanagari_processing.h @@ -13,6 +13,8 @@ #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ +#include "allheaders.h" +#include "debugpixa.h" #include "ocrblock.h" #include "params.h" @@ -84,7 +86,7 @@ class ShiroRekhaSplitter { // Returns true if a split was actually performed. // If split_for_pageseg is true, the pageseg_split_strategy_ is used for // splitting. If false, the ocr_split_strategy_ is used. - bool Split(bool split_for_pageseg); + bool Split(bool split_for_pageseg, DebugPixa* pixa_debug); // Clears the memory held by this object. void Clear(); @@ -152,9 +154,6 @@ class ShiroRekhaSplitter { return segmentation_block_list_; } - // This method dumps a debug image to the specified location. - void DumpDebugImage(const char* filename) const; - // This method returns the computed mode-height of blobs in the pix. // It also prunes very small blobs from calculation. Could be used to provide // a global xheight estimate for images which have the same point-size text. diff --git a/textord/imagefind.cpp b/textord/imagefind.cpp index 6f0714181e..ad730bcc2b 100644 --- a/textord/imagefind.cpp +++ b/textord/imagefind.cpp @@ -62,7 +62,8 @@ const int kNoisePadding = 4; // the image regions as a mask image. // The returned pix may be NULL, meaning no images found. // If not NULL, it must be PixDestroyed by the caller. -Pix* ImageFind::FindImages(Pix* pix) { +// If textord_tabfind_show_images, debug images are appended to pixa_debug. +Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) { // Not worth looking at small images. if (pixGetWidth(pix) < kMinImageFindSize || pixGetHeight(pix) < kMinImageFindSize) @@ -70,7 +71,8 @@ Pix* ImageFind::FindImages(Pix* pix) { // Reduce by factor 2. Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); - pixDisplayWrite(pixr, textord_tabfind_show_images); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixr, "CascadeReduced"); // Get the halftone mask directly from Leptonica. // @@ -93,7 +95,8 @@ Pix* ImageFind::FindImages(Pix* pix) { // Expand back up again. Pix *pixht = pixExpandReplicate(pixht2, 2); - pixDisplayWrite(pixht, textord_tabfind_show_images); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixht, "HalftoneReplicated"); pixDestroy(&pixht2); // Fill to capture pixels near the mask edges that were missed @@ -104,14 +107,16 @@ Pix* ImageFind::FindImages(Pix* pix) { // Eliminate lines and bars that may be joined to images. Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); pixDilateBrick(pixfinemask, pixfinemask, 5, 5); - pixDisplayWrite(pixfinemask, textord_tabfind_show_images); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixfinemask, "FineMask"); Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); pixDestroy(&pixreduced); pixDilateBrick(pixreduced2, pixreduced2, 5, 5); Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); pixDestroy(&pixreduced2); - pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixcoarsemask, "CoarseMask"); // Combine the coarse and fine image masks. pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); pixDestroy(&pixfinemask); @@ -119,13 +124,13 @@ Pix* ImageFind::FindImages(Pix* pix) { pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); pixDestroy(&pixcoarsemask); - if (textord_tabfind_show_images) - pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixmask, "MaskDilated"); // And the image mask with the line and bar remover. pixAnd(pixht, pixht, pixmask); pixDestroy(&pixmask); - if (textord_tabfind_show_images) - pixWrite("junkfinalimagemask.png", pixht, IFF_PNG); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixht, "FinalMask"); // Make the result image the same size as the input. Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); pixOr(result, result, pixht); @@ -140,12 +145,13 @@ Pix* ImageFind::FindImages(Pix* pix) { // If not NULL, they must be destroyed by the caller. // Resolution of pix should match the source image (Tesseract::pix_binary_) // so the output coordinate systems match. -void ImageFind::ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa) { +void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, + Boxa** boxa, Pixa** pixa) { *boxa = NULL; *pixa = NULL; - if (textord_tabfind_show_images) - pixWrite("junkconncompimage.png", pix, IFF_PNG); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pix, "Conncompimage"); // Find the individual image regions in the mask image. *boxa = pixConnComp(pix, pixa, 8); // Rectangularize the individual images. If a sharp edge in vertical and/or @@ -156,7 +162,8 @@ void ImageFind::ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa) { for (int i = 0; i < npixes; ++i) { int x_start, x_end, y_start, y_end; Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); - pixDisplayWrite(img_pix, textord_tabfind_show_images); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(img_pix, "A component"); if (pixNearlyRectangular(img_pix, kMinRectangularFraction, kMaxRectangularFraction, kMaxRectangularGradient, @@ -1282,17 +1289,15 @@ static void DeleteSmallImages(ColPartitionGrid* part_grid) { // Since the other blobs in the other partitions will be owned by the block, // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this // situation and collect the image blobs. -void ImageFind::FindImagePartitions(Pix* image_pix, - const FCOORD& rotation, - const FCOORD& rerotation, - TO_BLOCK* block, - TabFind* tab_grid, - ColPartitionGrid* part_grid, - ColPartition_LIST* big_parts) { +void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation, + const FCOORD& rerotation, TO_BLOCK* block, + TabFind* tab_grid, DebugPixa* pixa_debug, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts) { int imageheight = pixGetHeight(image_pix); Boxa* boxa; Pixa* pixa; - ConnCompAndRectangularize(image_pix, &boxa, &pixa); + ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa); // Iterate the connected components in the image regions mask. int nboxes = 0; if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa); @@ -1307,8 +1312,8 @@ void ImageFind::FindImagePartitions(Pix* image_pix, ColPartition_LIST part_list; DivideImageIntoParts(im_box, rotation, rerotation, pix, &rectsearch, &part_list); - if (textord_tabfind_show_images) { - pixWrite("junkimagecomponent.png", pix, IFF_PNG); + if (textord_tabfind_show_images && pixa_debug != nullptr) { + pixa_debug->AddPix(pix, "ImageComponent"); tprintf("Component has %d parts\n", part_list.length()); } pixDestroy(&pix); diff --git a/textord/imagefind.h b/textord/imagefind.h index 407225cdc3..2f1deabccc 100644 --- a/textord/imagefind.h +++ b/textord/imagefind.h @@ -21,6 +21,7 @@ #ifndef TESSERACT_TEXTORD_IMAGEFIND_H_ #define TESSERACT_TEXTORD_IMAGEFIND_H_ +#include "debugpixa.h" #include "host.h" struct Boxa; @@ -45,7 +46,8 @@ class ImageFind { // the image regions as a mask image. // The returned pix may be NULL, meaning no images found. // If not NULL, it must be PixDestroyed by the caller. - static Pix* FindImages(Pix* pix); + // If textord_tabfind_show_images, debug images are appended to pixa_debug. + static Pix* FindImages(Pix* pix, DebugPixa* pixa_debug); // Generates a Boxa, Pixa pair from the input binary (image mask) pix, // analgous to pixConnComp, except that connected components which are nearly @@ -54,7 +56,8 @@ class ImageFind { // If not NULL, they must be destroyed by the caller. // Resolution of pix should match the source image (Tesseract::pix_binary_) // so the output coordinate systems match. - static void ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa); + static void ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, + Boxa** boxa, Pixa** pixa); // Returns true if there is a rectangle in the source pix, such that all // pixel rows and column slices outside of it have less than @@ -144,11 +147,9 @@ class ImageFind { // Since the other blobs in the other partitions will be owned by the block, // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this // situation and collect the image blobs. - static void FindImagePartitions(Pix* image_pix, - const FCOORD& rotation, - const FCOORD& rerotation, - TO_BLOCK* block, - TabFind* tab_grid, + static void FindImagePartitions(Pix* image_pix, const FCOORD& rotation, + const FCOORD& rerotation, TO_BLOCK* block, + TabFind* tab_grid, DebugPixa* pixa_debug, ColPartitionGrid* part_grid, ColPartition_LIST* big_parts); }; diff --git a/textord/tabfind.cpp b/textord/tabfind.cpp index 30bad8bb3c..fff7fa5347 100644 --- a/textord/tabfind.cpp +++ b/textord/tabfind.cpp @@ -440,13 +440,8 @@ bool TabFind::FindTabVectors(TabVector_LIST* hlines, #ifndef GRAPHICS_DISABLED if (textord_tabfind_show_finaltabs) { tab_win = MakeWindow(640, 50, "FinalTabs"); - if (textord_debug_images) { - tab_win->Image(AlignedBlob::textord_debug_pix().string(), - image_origin_.x(), image_origin_.y()); - } else { - DisplayBoxes(tab_win); - DisplayTabs("FinalTabs", tab_win); - } + DisplayBoxes(tab_win); + DisplayTabs("FinalTabs", tab_win); tab_win = DisplayTabVectors(tab_win); } #endif // GRAPHICS_DISABLED @@ -1277,32 +1272,6 @@ bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, RotateBlobList(*deskew, &block->blobs); RotateBlobList(*deskew, &block->small_blobs); RotateBlobList(*deskew, &block->noise_blobs); - if (textord_debug_images) { - // Rotate the debug pix and arrange for it to be drawn at the correct - // pixel offset. - Pix* pix_grey = pixRead(AlignedBlob::textord_debug_pix().string()); - int width = pixGetWidth(pix_grey); - int height = pixGetHeight(pix_grey); - float angle = atan2(deskew->y(), deskew->x()); - // Positive angle is clockwise to pixRotate. - Pix* pix_rot = pixRotate(pix_grey, -angle, L_ROTATE_AREA_MAP, - L_BRING_IN_WHITE, width, height); - // The image must be translated by the rotation of its center, since it - // has just been rotated about its center. - ICOORD center_offset(width / 2, height / 2); - ICOORD new_center_offset(center_offset); - new_center_offset.rotate(*deskew); - image_origin_ += new_center_offset - center_offset; - // The image grew as it was rotated, so offset the (top/left) origin - // by half the change in size. y is opposite to x because it is drawn - // at ist top/left, not bottom/left. - ICOORD corner_offset((width - pixGetWidth(pix_rot)) / 2, - (pixGetHeight(pix_rot) - height) / 2); - image_origin_ += corner_offset; - pixWrite(AlignedBlob::textord_debug_pix().string(), pix_rot, IFF_PNG); - pixDestroy(&pix_grey); - pixDestroy(&pix_rot); - } // Rotate the horizontal vectors. The vertical vectors don't need // rotating as they can just be refitted. diff --git a/textord/tablefind.cpp b/textord/tablefind.cpp index b68655a57c..e2fa6cc675 100644 --- a/textord/tablefind.cpp +++ b/textord/tablefind.cpp @@ -143,7 +143,6 @@ const double kMaxXProjectionGapFactor = 2.0; const double kStrokeWidthFractionalTolerance = 0.25; const double kStrokeWidthConstantTolerance = 2.0; -BOOL_VAR(textord_dump_table_images, false, "Paint table detection output"); BOOL_VAR(textord_show_tables, false, "Show table regions"); BOOL_VAR(textord_tablefind_show_mark, false, "Debug table marking steps in detail"); @@ -371,9 +370,6 @@ void TableFinder::LocateTables(ColPartitionGrid* grid, #endif // GRAPHICS_DISABLED } - if (textord_dump_table_images) - WriteToPix(reskew); - // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. @@ -1999,80 +1995,6 @@ void TableFinder::DisplayColPartitionConnections( #endif } - -// Write debug image and text file. -// Note: This method is only for debug purpose during development and -// would not be part of checked in code -void TableFinder::WriteToPix(const FCOORD& reskew) { - // Input file must be named test1.tif - PIX* pix = pixRead("test1.tif"); - if (!pix) { - tprintf("Input file test1.tif not found.\n"); - return; - } - int img_height = pixGetHeight(pix); - int img_width = pixGetWidth(pix); - // Maximum number of text or table partitions - int num_boxes = 10; - BOXA* text_box_array = boxaCreate(num_boxes); - BOXA* table_box_array = boxaCreate(num_boxes); - GridSearch - gsearch(&clean_part_grid_); - gsearch.StartFullSearch(); - ColPartition* part; - // load colpartitions into text_box_array and table_box_array - while ((part = gsearch.NextFullSearch()) != NULL) { - TBOX box = part->bounding_box(); - box.rotate_large(reskew); - BOX* lept_box = boxCreate(box.left(), img_height - box.top(), - box.right() - box.left(), - box.top() - box.bottom()); - if (part->type() == PT_TABLE) - boxaAddBox(table_box_array, lept_box, L_INSERT); - else - boxaAddBox(text_box_array, lept_box, L_INSERT); - } - // draw colpartitions on the output image - PIX* out = pixDrawBoxa(pix, text_box_array, 3, 0xff000000); - out = pixDrawBoxa(out, table_box_array, 3, 0x0000ff00); - - BOXA* table_array = boxaCreate(num_boxes); - // text file containing detected table bounding boxes - FILE* fptr = fopen("tess-table.txt", "wb"); - GridSearch - table_search(&table_grid_); - table_search.StartFullSearch(); - ColSegment* table; - // load table boxes to table_array and write them to text file as well - while ((table = table_search.NextFullSearch()) != NULL) { - TBOX box = table->bounding_box(); - box.rotate_large(reskew); - // Since deskewing introduces negative coordinates, reskewing - // might not completely recover from that since both steps enlarge - // the actual box. Hence a box that undergoes deskewing/reskewing - // may go out of image boundaries. Crop a table box if needed to - // contain it inside the image dimensions. - box = box.intersection(TBOX(0, 0, img_width - 1, img_height - 1)); - BOX* lept_box = boxCreate(box.left(), img_height - box.top(), - box.right() - box.left(), - box.top() - box.bottom()); - boxaAddBox(table_array, lept_box, L_INSERT); - fprintf(fptr, "%d %d %d %d TABLE\n", box.left(), - img_height - box.top(), box.right(), img_height - box.bottom()); - } - fclose(fptr); - // paint table boxes on the debug image - out = pixDrawBoxa(out, table_array, 5, 0x7fff0000); - - pixWrite("out.png", out, IFF_PNG); - // memory cleanup - boxaDestroy(&text_box_array); - boxaDestroy(&table_box_array); - boxaDestroy(&table_array); - pixDestroy(&pix); - pixDestroy(&out); -} - // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. diff --git a/textord/tablefind.h b/textord/tablefind.h index 6f955e9602..49590a4f06 100644 --- a/textord/tablefind.h +++ b/textord/tablefind.h @@ -389,11 +389,6 @@ class TableFinder { void DisplayColSegmentGrid(ScrollView* win, ColSegmentGrid* grid, ScrollView::Color color); - // Write ColParitions and Tables to a PIX image - // Note: This method is only for debug purpose during development and - // would not be part of checked in code - void WriteToPix(const FCOORD& reskew); - // Merge all colpartitions in table regions to make them a single // colpartition and revert types of isolated table cells not // assigned to any table to their original types. diff --git a/textord/textlineprojection.cpp b/textord/textlineprojection.cpp index 2651a19b33..8220b95e32 100644 --- a/textord/textlineprojection.cpp +++ b/textord/textlineprojection.cpp @@ -119,6 +119,7 @@ void TextlineProjection::MoveNonTextlineBlobs( // Create a window and display the projection in it. void TextlineProjection::DisplayProjection() const { +#ifndef GRAPHICS_DISABLED int width = pixGetWidth(pix_); int height = pixGetHeight(pix_); Pix* pixc = pixCreate(width, height, 32); @@ -139,16 +140,12 @@ void TextlineProjection::DisplayProjection() const { col_data[x] = result; } } -#if 0 - // TODO(rays) uncomment when scrollview can display non-binary images. ScrollView* win = new ScrollView("Projection", 0, 0, width, height, width, height); win->Image(pixc, 0, 0); win->Update(); -#else - pixWrite("projection.png", pixc, IFF_PNG); -#endif pixDestroy(&pixc); +#endif // GRAPHICS_DISABLED } // Compute the distance of the box from the partition using curved projection