Skip to content

Commit

Permalink
added row attributes to hocr output
Browse files Browse the repository at this point in the history
  • Loading branch information
zdenop committed May 17, 2015
1 parent 917e994 commit 438edd6
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 0 deletions.
6 changes: 6 additions & 0 deletions api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,7 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {

int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
float row_height, descenders, ascenders; // row attributes
bool font_info = false;
GetBoolVariable("hocr_font_info", &font_info);

Expand Down Expand Up @@ -1480,7 +1481,12 @@ char* TessBaseAPI::GetHOCRText(struct ETEXT_DESC* monitor, int page_number) {
AddBoxTohOCR(res_it, RIL_PARA, &hocr_str);
}
if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
int fontsize;
hocr_str.add_str_int("\n <span class='ocr_line' id='line_", page_id);
res_it->RowAttributes(&row_height, &descenders, &ascenders);
hocr_str.add_str_int("' size='", row_height);
hocr_str.add_str_int("' descenders='", descenders * -1);
hocr_str.add_str_int("' ascenders='", ascenders);
hocr_str.add_str_int("_", lcnt);
AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str);
}
Expand Down
9 changes: 9 additions & 0 deletions ccmain/ltrresultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const {
return 0.0f;
}

void LTRResultIterator::RowAttributes(float* row_height,
float* descenders,
float* ascenders) const {
*row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders()
- it_->row()->row->descenders();
*descenders = it_->row()->row->descenders();
*ascenders = it_->row()->row->ascenders();
}

// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
Expand Down
5 changes: 5 additions & 0 deletions ccmain/ltrresultiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ class TESS_API LTRResultIterator : public PageIterator {
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;

// Returns the attributes of the current row.
void RowAttributes(float* row_height,
float* descenders,
float* ascenders) const;

// ============= Functions that refer to words only ============.

// Returns the font attributes of the current word. If iterating at a higher
Expand Down

0 comments on commit 438edd6

Please sign in to comment.