Skip to content

Commit

Permalink
Remove Tesseract parameter "include_page_breaks" and use FF by default
Browse files Browse the repository at this point in the history
Now Tesseract adds a page break (normally form feed) by default.

It is still possible to suppress page breaks by setting an empty
page_separator.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Sep 19, 2017
1 parent 3bb573a commit aa6eb6b
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 10 deletions.
4 changes: 1 addition & 3 deletions api/renderer.cpp
Expand Up @@ -132,10 +132,8 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {

AppendString(utf8.get());

bool pageBreak = false;
api->GetBoolVariable("include_page_breaks", &pageBreak);
const char* pageSeparator = api->GetStringVariable("page_separator");
if (pageBreak) {
if (pageSeparator != nullptr && *pageSeparator != '\0') {
AppendString(pageSeparator);
}

Expand Down
4 changes: 0 additions & 4 deletions ccmain/tesseractclass.cpp
Expand Up @@ -505,10 +505,6 @@ Tesseract::Tesseract()
this->params()),
BOOL_MEMBER(preserve_interword_spaces, false,
"Preserve multiple interword spaces", this->params()),
BOOL_MEMBER(include_page_breaks, FALSE,
"Include page separator string in output text after each "
"image/page.",
this->params()),
STRING_MEMBER(page_separator, "\f",
"Page separator (default is form feed control character)",
this->params()),
Expand Down
3 changes: 0 additions & 3 deletions ccmain/tesseractclass.h
Expand Up @@ -1103,9 +1103,6 @@ class Tesseract : public Wordrec {
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
BOOL_VAR_H(preserve_interword_spaces, false,
"Preserve multiple interword spaces");
BOOL_VAR_H(include_page_breaks, false,
"Include page separator string in output text after each "
"image/page.");
STRING_VAR_H(page_separator, "\f",
"Page separator (default is form feed control character)");

Expand Down

0 comments on commit aa6eb6b

Please sign in to comment.