print help for tesstrain.sh; fixes #1469

tesseract-ocr · Oct 2, 2018 · 7dbf5a0 · 7dbf5a0
1 parent 57a6f1d
commit 7dbf5a0
Showing 1 changed file with 38 additions and 30 deletions.
diff --git a/src/training/tesstrain.sh b/src/training/tesstrain.sh
@@ -14,39 +14,47 @@
 # Tesseract.  For a detailed description of the phases, see
 # https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract
 #
-# USAGE:
-#
-# tesstrain.sh
-#    --fontlist FONTS           # A list of fontnames to train on.
-#    --fonts_dir FONTS_PATH     # Path to font files.
-#    --lang LANG_CODE           # ISO 639 code.
-#    --langdata_dir DATADIR     # Path to tesseract/training/langdata directory.
-#    --output_dir OUTPUTDIR     # Location of output traineddata file.
-#    --save_box_tiff                # Save box/tiff pairs along with lstmf files.
-#    --overwrite                # Safe to overwrite files in output_dir.
-#    --linedata_only            # Only generate training data for lstmtraining.
-#    --run_shape_clustering     # Run shape clustering (use for Indic langs).
-#    --exposures EXPOSURES      # A list of exposure levels to use (e.g. "-1 0 1").
-#
-# OPTIONAL flags for input data. If unspecified we will look for them in
-# the langdata_dir directory.
-#    --training_text TEXTFILE   # Text to render and use for training.
-#    --wordlist WORDFILE        # Word list for the language ordered by
-#                               # decreasing frequency.
-#
-# OPTIONAL flag to specify location of existing traineddata files, required
-# during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
-# the current environment.
-#    --tessdata_dir TESSDATADIR     # Path to tesseract/tessdata directory.
-#
-# NOTE:
-# The font names specified in --fontlist need to be recognizable by Pango using
-# fontconfig. An easy way to list the canonical names of all fonts available on
-# your system is to run text2image with --list_available_fonts and the
-# appropriate --fonts_dir path.
 
+display_usage() {
+echo -e "USAGE: tesstrain.sh
+     --fontlist FONTS           # A list of fontnames to train on.
+     --fonts_dir FONTS_PATH     # Path to font files.
+     --lang LANG_CODE           # ISO 639 code.
+     --langdata_dir DATADIR     # Path to tesseract/training/langdata directory.
+     --output_dir OUTPUTDIR     # Location of output traineddata file.
+     --save_box_tiff                # Save box/tiff pairs along with lstmf files.
+     --overwrite                # Safe to overwrite files in output_dir.
+     --linedata_only            # Only generate training data for lstmtraining.
+     --run_shape_clustering     # Run shape clustering (use for Indic langs).
+     --exposures EXPOSURES      # A list of exposure levels to use (e.g. "-1 0 1").
+
+  OPTIONAL flags for input data. If unspecified we will look for them in
+  the langdata_dir directory.
+     --training_text TEXTFILE   # Text to render and use for training.
+     --wordlist WORDFILE        # Word list for the language ordered by
+                                # decreasing frequency.
+
+  OPTIONAL flag to specify location of existing traineddata files, required
+  during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
+  the current environment.
+     --tessdata_dir TESSDATADIR     # Path to tesseract/tessdata directory.
+
+  NOTE:
+  The font names specified in --fontlist need to be recognizable by Pango using
+  fontconfig. An easy way to list the canonical names of all fonts available on
+  your system is to run text2image with --list_available_fonts and the
+  appropriate --fonts_dir path."
+}
 
 source "$(dirname $0)/tesstrain_utils.sh"
+if [[ "$1" == "--help" || "$1" == "-h" ]]; then
+    display_usage
+    exit 0
+fi
+if [ $# == 0 ]; then
+    display_usage
+    exit 1
+fi
 
 ARGV=("$@")
 parse_flags