Skip to content

Commit

Permalink
Allow user to specify dpi for input image
Browse files Browse the repository at this point in the history
  • Loading branch information
zdenop committed Sep 28, 2018
1 parent 345e5ee commit a0564fd
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 13 deletions.
23 changes: 17 additions & 6 deletions src/api/baseapi.cpp
Expand Up @@ -2320,12 +2320,22 @@ bool TessBaseAPI::Threshold(Pix** pix) {
if (*pix != nullptr)
pixDestroy(pix);
// Zero resolution messes up the algorithms, so make sure it is credible.
int user_dpi = 0;
bool a = GetIntVariable("user_defined_dpi", &user_dpi);
int y_res = thresholder_->GetScaledYResolution();
if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) {
// Use the minimum default resolution, as it is safer to under-estimate
// than over-estimate resolution.
tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", y_res,
kMinCredibleResolution);
if (user_dpi && (user_dpi < kMinCredibleResolution ||
user_dpi > kMaxCredibleResolution)) {
tprintf("Warning: User defined image dpi is outside of expected range "
"(%d - %d)!\n",
kMinCredibleResolution, kMaxCredibleResolution);
}
// Always use user defined dpi
if (user_dpi) {
thresholder_->SetSourceYResolution(user_dpi);
} else if (y_res < kMinCredibleResolution ||
y_res > kMaxCredibleResolution) {
tprintf("Warning: Invalid resolution %d dpi. Using %d instead.\n",
y_res, kMinCredibleResolution);
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
PageSegMode pageseg_mode =
Expand All @@ -2350,7 +2360,8 @@ bool TessBaseAPI::Threshold(Pix** pix) {
kMinCredibleResolution,
kMaxCredibleResolution);
if (estimated_res != thresholder_->GetScaledEstimatedResolution()) {
tprintf("Estimated resolution %d out of range! Corrected to %d\n",
tprintf("Estimated internal resolution %d out of range! "
"Corrected to %d.\n",
thresholder_->GetScaledEstimatedResolution(), estimated_res);
}
tesseract_->set_source_resolution(estimated_res);
Expand Down
23 changes: 16 additions & 7 deletions src/api/tesseractmain.cpp
Expand Up @@ -165,6 +165,7 @@ static void PrintHelpExtra(const char* program) {
" --tessdata-dir PATH Specify the location of tessdata path.\n"
" --user-words PATH Specify the location of user words file.\n"
" --user-patterns PATH Specify the location of user patterns file.\n"
" --dpi VALUE Specify DPI for input image.\n"
" -l LANG[+LANG] Specify language(s) used for OCR.\n"
" -c VAR=VALUE Set value for config variables.\n"
" Multiple -c arguments are allowed.\n"
Expand Down Expand Up @@ -288,10 +289,9 @@ static void checkArgValues(int arg, const char* mode, int count) {
// NOTE: arg_i is used here to avoid ugly *i so many times in this function
static void ParseArgs(const int argc, char** argv, const char** lang,
const char** image, const char** outputbase,
const char** datapath,
bool* list_langs, bool* print_parameters,
GenericVector<STRING>* vars_vec,
GenericVector<STRING>* vars_values, int* arg_i,
const char** datapath, l_int32* dpi, bool* list_langs,
bool* print_parameters, GenericVector<STRING>* vars_vec,
GenericVector<STRING>* vars_values, l_int32* arg_i,
tesseract::PageSegMode* pagesegmode,
tesseract::OcrEngineMode* enginemode) {
bool noocr = false;
Expand Down Expand Up @@ -324,6 +324,9 @@ static void ParseArgs(const int argc, char** argv, const char** lang,
} else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
*datapath = argv[i + 1];
++i;
} else if (strcmp(argv[i], "--dpi") == 0 && i + 1 < argc) {
*dpi = atoi(argv[i + 1]);
++i;
} else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
vars_vec->push_back("user_words_file");
vars_values->push_back(argv[i + 1]);
Expand Down Expand Up @@ -456,6 +459,7 @@ int main(int argc, char** argv) {
const char* datapath = nullptr;
bool list_langs = false;
bool print_parameters = false;
l_int32 dpi = 0;
int arg_i = 1;
tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
#ifdef DISABLED_LEGACY_ENGINE
Expand All @@ -479,9 +483,9 @@ int main(int argc, char** argv) {
TIFFSetWarningHandler(Win32WarningHandler);
#endif /* HAVE_TIFFIO_H && _WIN32 */

ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
&print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
&enginemode);
ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &dpi,
&list_langs, &print_parameters, &vars_vec, &vars_values, &arg_i,
&pagesegmode, &enginemode);

if (lang == nullptr) {
// Set default language if none was given.
Expand Down Expand Up @@ -528,6 +532,11 @@ int main(int argc, char** argv) {

FixPageSegMode(&api, pagesegmode);

if (dpi) {
char dpi_string[255];
snprintf(dpi_string, 254, "%d", dpi);
api.SetVariable("user_defined_dpi", dpi_string);
}
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
int ret_val = EXIT_SUCCESS;

Expand Down
2 changes: 2 additions & 0 deletions src/ccmain/tesseractclass.cpp
Expand Up @@ -395,6 +395,8 @@ Tesseract::Tesseract()
"Create PDF with only one invisible text layer",
this->params()),
INT_MEMBER(jpg_quality, 85, "Set JPEG quality level", this->params()),
INT_MEMBER(user_defined_dpi, 0, "Specify DPI for input image",
this->params()),
STRING_MEMBER(unrecognised_char, "|",
"Output char for unidentified blobs", this->params()),
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
Expand Down
1 change: 1 addition & 0 deletions src/ccmain/tesseractclass.h
Expand Up @@ -1042,6 +1042,7 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(textonly_pdf, false,
"Create PDF with only one invisible text layer");
INT_VAR_H(jpg_quality, 85, "Set JPEG quality level");
INT_VAR_H(user_defined_dpi, 0, "Specify DPI for input image");
STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level");
Expand Down

0 comments on commit a0564fd

Please sign in to comment.