Skip to content

Commit

Permalink
print help for tesstrain.sh; fixes #1469
Browse files Browse the repository at this point in the history
  • Loading branch information
zdenop committed Oct 2, 2018
1 parent 57a6f1d commit 7dbf5a0
Showing 1 changed file with 38 additions and 30 deletions.
68 changes: 38 additions & 30 deletions src/training/tesstrain.sh
Expand Up @@ -14,39 +14,47 @@
# Tesseract. For a detailed description of the phases, see
# https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract
#
# USAGE:
#
# tesstrain.sh
# --fontlist FONTS # A list of fontnames to train on.
# --fonts_dir FONTS_PATH # Path to font files.
# --lang LANG_CODE # ISO 639 code.
# --langdata_dir DATADIR # Path to tesseract/training/langdata directory.
# --output_dir OUTPUTDIR # Location of output traineddata file.
# --save_box_tiff # Save box/tiff pairs along with lstmf files.
# --overwrite # Safe to overwrite files in output_dir.
# --linedata_only # Only generate training data for lstmtraining.
# --run_shape_clustering # Run shape clustering (use for Indic langs).
# --exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
#
# OPTIONAL flags for input data. If unspecified we will look for them in
# the langdata_dir directory.
# --training_text TEXTFILE # Text to render and use for training.
# --wordlist WORDFILE # Word list for the language ordered by
# # decreasing frequency.
#
# OPTIONAL flag to specify location of existing traineddata files, required
# during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
# the current environment.
# --tessdata_dir TESSDATADIR # Path to tesseract/tessdata directory.
#
# NOTE:
# The font names specified in --fontlist need to be recognizable by Pango using
# fontconfig. An easy way to list the canonical names of all fonts available on
# your system is to run text2image with --list_available_fonts and the
# appropriate --fonts_dir path.

display_usage() {
echo -e "USAGE: tesstrain.sh
--fontlist FONTS # A list of fontnames to train on.
--fonts_dir FONTS_PATH # Path to font files.
--lang LANG_CODE # ISO 639 code.
--langdata_dir DATADIR # Path to tesseract/training/langdata directory.
--output_dir OUTPUTDIR # Location of output traineddata file.
--save_box_tiff # Save box/tiff pairs along with lstmf files.
--overwrite # Safe to overwrite files in output_dir.
--linedata_only # Only generate training data for lstmtraining.
--run_shape_clustering # Run shape clustering (use for Indic langs).
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
OPTIONAL flags for input data. If unspecified we will look for them in
the langdata_dir directory.
--training_text TEXTFILE # Text to render and use for training.
--wordlist WORDFILE # Word list for the language ordered by
# decreasing frequency.
OPTIONAL flag to specify location of existing traineddata files, required
during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
the current environment.
--tessdata_dir TESSDATADIR # Path to tesseract/tessdata directory.
NOTE:
The font names specified in --fontlist need to be recognizable by Pango using
fontconfig. An easy way to list the canonical names of all fonts available on
your system is to run text2image with --list_available_fonts and the
appropriate --fonts_dir path."
}

source "$(dirname $0)/tesstrain_utils.sh"
if [[ "$1" == "--help" || "$1" == "-h" ]]; then
display_usage
exit 0
fi
if [ $# == 0 ]; then
display_usage
exit 1
fi

ARGV=("$@")
parse_flags
Expand Down

0 comments on commit 7dbf5a0

Please sign in to comment.