From 92d981b93a4c54f6727681968451e7de72cc8b69 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 30 Nov 2016 22:23:08 +0100 Subject: [PATCH] Change tesseract parameter -psm to --psm For compatibility reasons the old variant is still supported. Signed-off-by: Stefan Weil --- README.md | 2 +- api/tesseractmain.cpp | 7 ++++++- doc/tesseract.1 | 4 ++-- doc/tesseract.1.asc | 4 ++-- doc/tesseract.1.html | 4 ++-- doc/tesseract.1.xml | 4 ++-- testing/runtestset.sh | 2 +- 7 files changed, 16 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index cc9f3d69ae..30af585520 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ You can either [Install Tesseract via pre-built binary package](https://github.c Basic command line usage: - tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...] + tesseract imagename outputbase [-l lang] [--psm pagesegmode] [configfiles...] For more information about the various command line options use `tesseract --help` or `man tesseract`. diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 74c1507304..0ded1c08ac 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -142,7 +142,7 @@ void PrintHelpMessage(const char* program) { " -l LANG[+LANG] Specify language(s) used for OCR.\n" " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" - " -psm NUM Specify page segmentation mode.\n" + " --psm NUM Specify page segmentation mode.\n" " --oem NUM Specify OCR Engine mode.\n" "NOTE: These options must occur before any configfile.\n"; @@ -275,6 +275,11 @@ void ParseArgs(const int argc, char** argv, const char** lang, noocr = true; *list_langs = true; } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) { + // The parameter -psm is deprecated and was replaced by --psm. + // It is still supported for compatibility reasons. + *pagesegmode = static_cast(atoi(argv[i + 1])); + ++i; + } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { *pagesegmode = static_cast(atoi(argv[i + 1])); ++i; } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { diff --git a/doc/tesseract.1 b/doc/tesseract.1 index 95128fec99..89107f0312 100644 --- a/doc/tesseract.1 +++ b/doc/tesseract.1 @@ -84,7 +84,7 @@ Set value for control parameter\&. Multiple \-c arguments are allowed\&. The language to use\&. If none is specified, English is assumed\&. Multiple languages may be specified, separated by plus characters\&. Tesseract uses 3\-character ISO 639\-2 language codes\&. (See LANGUAGES) .RE .PP -\fI\-psm N\fR +\fI\--psm N\fR .RS 4 Set Tesseract to only run a subset of layout analysis and assume a certain form of image\&. The options for \fBN\fR @@ -139,7 +139,7 @@ pdf \- Output in pdf instead of a text file\&. .RE .RE .sp -\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\-psm N\fR must occur before any \fIconfigfile\fR\&. +\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\--psm N\fR must occur before any \fIconfigfile\fR\&. .SH "SINGLE OPTIONS" .PP \fI\-v\fR diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc index 56627a9392..237299fe51 100644 --- a/doc/tesseract.1.asc +++ b/doc/tesseract.1.asc @@ -54,7 +54,7 @@ OPTIONS Multiple languages may be specified, separated by plus characters. Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) -'-psm N':: +'--psm N':: Set Tesseract to only run a subset of layout analysis and assume a certain form of image. The options for *N* are: @@ -78,7 +78,7 @@ OPTIONS * hocr - Output in hOCR format instead of as a text file. * pdf - Output in pdf instead of a text file. -*Nota Bene:* The options '-l lang' and '-psm N' must occur +*Nota Bene:* The options '-l lang' and '--psm N' must occur before any 'configfile'. diff --git a/doc/tesseract.1.html b/doc/tesseract.1.html index 90c5dae78c..5e37d31170 100644 --- a/doc/tesseract.1.html +++ b/doc/tesseract.1.html @@ -847,7 +847,7 @@

OPTIONS

--psm N +--psm N

@@ -893,7 +893,7 @@

OPTIONS

-

Nota Bene: The options -l lang and -psm N must occur +

Nota Bene: The options -l lang and --psm N must occur before any configfile.

diff --git a/doc/tesseract.1.xml b/doc/tesseract.1.xml index 2f971caa7b..842c5acd61 100644 --- a/doc/tesseract.1.xml +++ b/doc/tesseract.1.xml @@ -130,7 +130,7 @@ at Google since then. --psm N +--psm N @@ -176,7 +176,7 @@ pdf - Output in pdf instead of a text file. -Nota Bene: The options -l lang and -psm N must occur +Nota Bene: The options -l lang and --psm N must occur before any configfile. diff --git a/testing/runtestset.sh b/testing/runtestset.sh index 0c9595f9aa..5c2a7e7aa2 100755 --- a/testing/runtestset.sh +++ b/testing/runtestset.sh @@ -64,7 +64,7 @@ do srcdir="$imdir" fi # echo "$srcdir/$page.tif" - $tess $srcdir/$page.tif $resdir/$page -psm 6 $config 2>&1 |grep -v "OCR Engine" + $tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine" if [ -r times.txt ] then read t