Fixed issue #557

tesseract-ocr · Jan 26, 2017 · a1c22fb · a1c22fb
1 parent b453f74
commit a1c22fb
Show file tree

Hide file tree

Showing 23 changed files with 131 additions and 423 deletions.
diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp
@@ -403,7 +403,7 @@ int main(int argc, char** argv) {
 
 #if !defined(DEBUG)
   // Disable debugging and informational messages from Leptonica.
-  setMsgSeverity(L_SEVERITY_WARNING);
+  setMsgSeverity(L_SEVERITY_ERROR);
 #endif
 
 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)

diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp
@@ -176,7 +176,7 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks,
   tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix,
                                             &vertical_x, &vertical_y,
                                             NULL, &v_lines, &h_lines);
-  Pix* im_pix = tesseract::ImageFind::FindImages(pix);
+  Pix* im_pix = tesseract::ImageFind::FindImages(pix, nullptr);
   if (im_pix != NULL) {
     pixSubtract(pix, pix, im_pix);
     pixDestroy(&im_pix);

diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp
@@ -37,6 +37,7 @@
 #include "blobbox.h"
 #include "blread.h"
 #include "colfind.h"
+#include "debugpixa.h"
 #include "equationdetect.h"
 #include "imagefind.h"
 #include "linefind.h"
@@ -176,28 +177,6 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
   return auto_page_seg_ret_val;
 }
 
-// Helper writes a grey image to a file for use by scrollviewer.
-// Normally for speed we don't display the image in the layout debug windows.
-// If textord_debug_images is true, we draw the image as a background to some
-// of the debug windows. printable determines whether these
-// images are optimized for printing instead of screen display.
-static void WriteDebugBackgroundImage(bool printable, Pix* pix_binary) {
-  Pix* grey_pix = pixCreate(pixGetWidth(pix_binary),
-                            pixGetHeight(pix_binary), 8);
-  // Printable images are light grey on white, but for screen display
-  // they are black on dark grey so the other colors show up well.
-  if (printable) {
-    pixSetAll(grey_pix);
-    pixSetMasked(grey_pix, pix_binary, 192);
-  } else {
-    pixSetAllArbitrary(grey_pix, 64);
-    pixSetMasked(grey_pix, pix_binary, 0);
-  }
-  AlignedBlob::IncrementDebugPix();
-  pixWrite(AlignedBlob::textord_debug_pix().string(), grey_pix, IFF_PNG);
-  pixDestroy(&grey_pix);
-}
-
 /**
  * Auto page segmentation. Divide the page image into blocks of uniform
  * text linespacing and images.
@@ -226,9 +205,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
                            TO_BLOCK_LIST* to_blocks,
                            BLOBNBOX_LIST* diacritic_blobs, Tesseract* osd_tess,
                            OSResults* osr) {
-  if (textord_debug_images) {
-    WriteDebugBackgroundImage(textord_debug_printable, pix_binary_);
-  }
   Pix* photomask_pix = NULL;
   Pix* musicmask_pix = NULL;
   // The blocks made by the ColumnFinder. Moved to blocks before return.
@@ -250,9 +226,10 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
     if (equ_detect_) {
       finder->SetEquationDetect(equ_detect_);
     }
-    result = finder->FindBlocks(
-        pageseg_mode, scaled_color_, scaled_factor_, to_block, photomask_pix,
-        pix_thresholds_, pix_grey_, &found_blocks, diacritic_blobs, to_blocks);
+    result = finder->FindBlocks(pageseg_mode, scaled_color_, scaled_factor_,
+                                to_block, photomask_pix, pix_thresholds_,
+                                pix_grey_, &pixa_debug_, &found_blocks,
+                                diacritic_blobs, to_blocks);
     if (result >= 0)
       finder->GetDeskewVectors(&deskew_, &reskew_);
     delete finder;
@@ -265,11 +242,6 @@ int Tesseract::AutoPageSeg(PageSegMode pageseg_mode, BLOCK_LIST* blocks,
   BLOCK_IT block_it(blocks);
   // Move the found blocks to the input/output blocks.
   block_it.add_list_after(&found_blocks);
-
-  if (textord_debug_images) {
-    // The debug image is no longer needed so delete it.
-    unlink(AlignedBlob::textord_debug_pix().string());
-  }
   return result;
 }
 
@@ -311,19 +283,21 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation(
 
   ASSERT_HOST(pix_binary_ != NULL);
   if (tessedit_dump_pageseg_images) {
-    pixWrite("tessinput.png", pix_binary_, IFF_PNG);
+    pixa_debug_.AddPix(pix_binary_, "PageSegInput");
   }
   // Leptonica is used to find the rule/separator lines in the input.
   LineFinder::FindAndRemoveLines(source_resolution_,
                                  textord_tabfind_show_vlines, pix_binary_,
                                  &vertical_x, &vertical_y, music_mask_pix,
                                  &v_lines, &h_lines);
-  if (tessedit_dump_pageseg_images)
-    pixWrite("tessnolines.png", pix_binary_, IFF_PNG);
+  if (tessedit_dump_pageseg_images) {
+    pixa_debug_.AddPix(pix_binary_, "NoLines");
+  }
   // Leptonica is used to find a mask of the photo regions in the input.
-  *photo_mask_pix = ImageFind::FindImages(pix_binary_);
-  if (tessedit_dump_pageseg_images)
-    pixWrite("tessnoimages.png", pix_binary_, IFF_PNG);
+  *photo_mask_pix = ImageFind::FindImages(pix_binary_, &pixa_debug_);
+  if (tessedit_dump_pageseg_images) {
+    pixa_debug_.AddPix(pix_binary_, "NoImages");
+  }
   if (!PSM_COL_FIND_ENABLED(pageseg_mode)) v_lines.clear();
 
   // The rest of the algorithm uses the usual connected components.

diff --git a/ccstruct/Makefile.am b/ccstruct/Makefile.am
@@ -12,7 +12,7 @@ endif
 include_HEADERS = publictypes.h
 noinst_HEADERS = \
     blamer.h blckerr.h blobbox.h blobs.h blread.h boxread.h boxword.h ccstruct.h coutln.h crakedge.h \
-    detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
+    debugpixa.h detlinefit.h dppoint.h fontinfo.h genblob.h hpdsizes.h \
     imagedata.h \
     ipoints.h \
     linlsq.h matrix.h mod128.h normalis.h \

diff --git a/ccstruct/debugpixa.h b/ccstruct/debugpixa.h
@@ -0,0 +1,52 @@
+#ifndef TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+#define TESSERACT_CCSTRUCT_DEBUGPIXA_H_
+
+#include "allheaders.h"
+
+namespace tesseract {
+
+// Class to hold a Pixa collection of debug images with captions and save them
+// to a PDF file.
+class DebugPixa {
+ public:
+  // TODO(rays) add another constructor with size control.
+  DebugPixa() {
+    pixa_ = pixaCreate(0);
+    fonts_ = bmfCreate(nullptr, 14);
+  }
+  // If the filename_ has been set and there are any debug images, they are
+  // written to the set filename_.
+  ~DebugPixa() {
+    pixaDestroy(&pixa_);
+    bmfDestroy(&fonts_);
+  }
+
+  // Adds the given pix to the set of pages in the PDF file, with the given
+  // caption added to the top.
+  void AddPix(const Pix* pix, const char* caption) {
+    int depth = pixGetDepth(const_cast<Pix*>(pix));
+    int color = depth < 8 ? 1 : (depth > 8 ? 0x00ff0000 : 0x80);
+    Pix* pix_debug = pixAddSingleTextblock(
+        const_cast<Pix*>(pix), fonts_, caption, color, L_ADD_BELOW, nullptr);
+    pixaAddPix(pixa_, pix_debug, L_INSERT);
+  }
+
+  // Sets the destination filename and enables images to be written to a PDF
+  // on destruction.
+  void WritePDF(const char* filename) {
+    if (pixaGetCount(pixa_) > 0) {
+      pixaConvertToPdf(pixa_, 300, 1.0f, 0, 0, "AllDebugImages", filename);
+      pixaClear(pixa_);
+    }
+  }
+
+ private:
+  // The collection of images to put in the PDF.
+  Pixa* pixa_;
+  // The fonts used to draw text captions.
+  L_Bmf* fonts_;
+};
+
+}  // namespace tesseract
+
+#endif  // TESSERACT_CCSTRUCT_DEBUGPIXA_H_
diff --git a/classify/trainingsampleset.cpp b/classify/trainingsampleset.cpp
@@ -487,81 +487,6 @@ void TrainingSampleSet::IndexFeatures(const IntFeatureSpace& feature_space) {
     samples_[s]->IndexFeatures(feature_space);
 }
 
-// Delete outlier samples with few features that are shared with others.
-// IndexFeatures must have been called already.
-void TrainingSampleSet::DeleteOutliers(const IntFeatureSpace& feature_space,
-                                       bool debug) {
-  if (font_class_array_ == NULL)
-    OrganizeByFontAndClass();
-  Pixa* pixa = NULL;
-  if (debug)
-    pixa = pixaCreate(0);
-  GenericVector<int> feature_counts;
-  int fs_size = feature_space.Size();
-  int font_size = font_id_map_.CompactSize();
-  for (int font_index = 0; font_index < font_size; ++font_index) {
-    for (int c = 0; c < unicharset_size_; ++c) {
-      // Create a histogram of the features used by all samples of this
-      // font/class combination.
-      feature_counts.init_to_size(fs_size, 0);
-      FontClassInfo& fcinfo = (*font_class_array_)(font_index, c);
-      int sample_count = fcinfo.samples.size();
-      if (sample_count < kMinOutlierSamples)
-        continue;
-      for (int i = 0; i < sample_count; ++i) {
-        int s = fcinfo.samples[i];
-        const GenericVector<int>& features = samples_[s]->indexed_features();
-        for (int f = 0; f < features.size(); ++f) {
-          ++feature_counts[features[f]];
-        }
-      }
-      for (int i = 0; i < sample_count; ++i) {
-        int s = fcinfo.samples[i];
-        const TrainingSample& sample = *samples_[s];
-        const GenericVector<int>& features = sample.indexed_features();
-        // A feature that has a histogram count of 1 is only used by this
-        // sample, making it 'bad'. All others are 'good'.
-        int good_features = 0;
-        int bad_features = 0;
-        for (int f = 0; f < features.size(); ++f) {
-          if (feature_counts[features[f]] > 1)
-            ++good_features;
-          else
-            ++bad_features;
-        }
-        // If more than 1/3 features are bad, then this is an outlier.
-        if (bad_features * 2 > good_features) {
-          tprintf("Deleting outlier sample of %s, %d good, %d bad\n",
-                  SampleToString(sample).string(),
-                  good_features, bad_features);
-          if (debug) {
-            pixaAddPix(pixa, sample.RenderToPix(&unicharset_), L_INSERT);
-            // Add the previous sample as well, so it is easier to see in
-            // the output what is wrong with this sample.
-            int t;
-            if (i == 0)
-              t = fcinfo.samples[1];
-            else
-              t = fcinfo.samples[i - 1];
-            const TrainingSample &csample = *samples_[t];
-            pixaAddPix(pixa, csample.RenderToPix(&unicharset_), L_INSERT);
-          }
-          // Mark the sample for deletion.
-          KillSample(samples_[s]);
-        }
-      }
-    }
-  }
-  // Truly delete all bad samples and renumber everything.
-  DeleteDeadSamples();
-  if (pixa != NULL) {
-    Pix* pix = pixaDisplayTiledInRows(pixa, 1, 2600, 1.0, 0, 10, 10);
-    pixaDestroy(&pixa);
-    pixWrite("outliers.png", pix, IFF_PNG);
-    pixDestroy(&pix);
-  }
-}
-
 // Marks the given sample index for deletion.
 // Deletion is actually completed by DeleteDeadSamples.
 void TrainingSampleSet::KillSample(TrainingSample* sample) {
@@ -745,12 +670,6 @@ void TrainingSampleSet::ComputeCanonicalSamples(const IntFeatureMap& map,
   if (debug) {
     tprintf("Global worst dist = %g, between sample %d and %d\n",
             global_worst_dist, worst_s1, worst_s2);
-    Pix* pix1 = DebugSample(unicharset_, samples_[worst_s1]);
-    Pix* pix2 = DebugSample(unicharset_, samples_[worst_s2]);
-    pixOr(pix1, pix1, pix2);
-    pixWrite("worstpair.png", pix1, IFF_PNG);
-    pixDestroy(&pix1);
-    pixDestroy(&pix2);
   }
 }
 

diff --git a/classify/trainingsampleset.h b/classify/trainingsampleset.h
@@ -171,10 +171,6 @@ class TrainingSampleSet {
   // Generates indexed features for all samples with the supplied feature_space.
   void IndexFeatures(const IntFeatureSpace& feature_space);
 
-  // Delete outlier samples with few features that are shared with others.
-  // IndexFeatures must have been called already.
-  void DeleteOutliers(const IntFeatureSpace& feature_space, bool debug);
-
   // Marks the given sample for deletion.
   // Deletion is actually completed by DeleteDeadSamples.
   void KillSample(TrainingSample* sample);

diff --git a/textord/alignedblob.cpp b/textord/alignedblob.cpp
@@ -30,7 +30,6 @@ INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle");
 INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle");
 INT_VAR(textord_testregion_right, MAX_INT32, "Right edge of debug rectangle");
 INT_VAR(textord_testregion_bottom, MAX_INT32, "Bottom edge of debug rectangle");
-BOOL_VAR(textord_debug_images, false, "Use greyed image background for debug");
 BOOL_VAR(textord_debug_printable, false, "Make debug windows printable");
 
 namespace tesseract {
@@ -64,25 +63,6 @@ const double kMinTabGradient = 4.0;
 // If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor.
 const int kMaxSkewFactor = 15;
 
-// Constant part of textord_debug_pix_.
-const char* kTextordDebugPix = "psdebug_pix";
-
-// Name of image file to use if textord_debug_images is true.
-STRING AlignedBlob::textord_debug_pix_ = kTextordDebugPix;
-// Index to image file to use if textord_debug_images is true.
-int AlignedBlob::debug_pix_index_ = 0;
-
-// Increment the serial number counter and set the string to use
-// for a filename if textord_debug_images is true.
-void AlignedBlob::IncrementDebugPix() {
-  ++debug_pix_index_;
-  textord_debug_pix_ = kTextordDebugPix;
-  char numbuf[32];
-  snprintf(numbuf, sizeof(numbuf), "%d", debug_pix_index_);
-  textord_debug_pix_ += numbuf;
-  textord_debug_pix_ += ".pix";
-}
-
 // Constructor to set the parameters for finding aligned and ragged tabs.
 // Vertical_x and vertical_y are the current estimates of the true vertical
 // direction (up) in the image. Height is the height of the starter blob.

diff --git a/textord/alignedblob.h b/textord/alignedblob.h
@@ -29,8 +29,6 @@
 extern INT_VAR_H(textord_debug_bugs, 0,
                  "Turn on output related to bugs in tab finding");
 extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding");
-extern BOOL_VAR_H(textord_debug_images, false,
-                "Use greyed image background for debug");
 extern BOOL_VAR_H(textord_debug_printable, false,
                   "Make debug windows printable");
 
@@ -102,17 +100,6 @@ class AlignedBlob : public BlobGrid {
                                    BLOBNBOX* bbox,
                                    int* vertical_x, int* vertical_y);
 
-  // Increment the serial number counter and set the string to use
-  // for a filename if textord_debug_images is true.
-  static void IncrementDebugPix();
-
-  // Return the string to use for a filename if textord_debug_images is true.
-  // Use IncrementDebugPix first to set the filename, and each time is
-  // to be incremented.
-  static const STRING& textord_debug_pix() {
-    return textord_debug_pix_;
-  }
-
  private:
   // Find a set of blobs that are aligned in the given vertical
   // direction with the given blob. Returns a list of aligned
@@ -132,11 +119,6 @@ class AlignedBlob : public BlobGrid {
   BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p,
                             bool top_to_bottom, BLOBNBOX* bbox,
                             int x_start, int* end_y);
-
-  // Name of image file to use if textord_debug_images is true.
-  static STRING textord_debug_pix_;
-  // Index to image file to use if textord_debug_images is true.
-  static int debug_pix_index_;
 };
 
 }  // namespace tesseract.