Skip to content

Commit

Permalink
Fix wrong font attributes in hOCR output
Browse files Browse the repository at this point in the history
Instrumented code throws this runtime error during OCR:

    ../../src/api/baseapi.cpp:1616:5: runtime error: load of value 128,
      which is not a valid value for type 'bool'
    ../../src/api/baseapi.cpp:1627:5: runtime error: load of value 128,
      which is not a valid value for type 'bool'

If there is no font information (typical for Tesseract with a LSTM model),
the font attributes got random values resulting in wrong hOCR output.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Dec 4, 2018
1 parent 0bdae8f commit c9e85ab
Showing 1 changed file with 35 additions and 19 deletions.
54 changes: 35 additions & 19 deletions src/ccmain/ltrresultiterator.cpp
Expand Up @@ -171,27 +171,43 @@ const char* LTRResultIterator::WordFontAttributes(bool* is_bold,
bool* is_smallcaps,
int* pointsize,
int* font_id) const {
if (it_->word() == nullptr) return nullptr; // Already at the end!
float row_height = it_->row()->row->x_height() +
it_->row()->row->ascenders() - it_->row()->row->descenders();
// Convert from pixels to printers points.
*pointsize = scaled_yres_ > 0
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
: 0;
if (it_->word()->fontinfo == nullptr) {
const char* result = nullptr;

if (it_->word()) {
// Already at the end!
*pointsize = 0;
} else {
float row_height = it_->row()->row->x_height() +
it_->row()->row->ascenders() - it_->row()->row->descenders();
// Convert from pixels to printers points.
*pointsize = scaled_yres_ > 0
? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
: 0;
const FontInfo* font_info = it_->word()->fontinfo;
if (font_info) {
// Font information available.
*font_id = font_info->universal_id;
*is_bold = font_info->is_bold();
*is_italic = font_info->is_italic();
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info->is_fixed_pitch();
*is_serif = font_info->is_serif();
*is_smallcaps = it_->word()->small_caps;
result = font_info->name;
}
}

if (!result) {
*is_bold = false;
*is_italic = false;
*is_underlined = false;
*is_monospace = false;
*is_serif = false;
*is_smallcaps = false;
*font_id = -1;
return nullptr; // No font information.
}
const FontInfo& font_info = *it_->word()->fontinfo;
*font_id = font_info.universal_id;
*is_bold = font_info.is_bold();
*is_italic = font_info.is_italic();
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info.is_fixed_pitch();
*is_serif = font_info.is_serif();
*is_smallcaps = it_->word()->small_caps;

return font_info.name;

return result;
}

// Returns the name of the language used to recognize this word.
Expand Down

0 comments on commit c9e85ab

Please sign in to comment.