Skip to content

Commit

Permalink
add --xsize as parameter for tesstrain
Browse files Browse the repository at this point in the history
  • Loading branch information
Shreeshrii committed Jan 27, 2019
1 parent 1ac76d8 commit 4d9bc11
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
12 changes: 6 additions & 6 deletions src/training/tesstrain.sh
Expand Up @@ -17,28 +17,28 @@

display_usage() {
echo -e "USAGE: tesstrain.sh
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
--fontlist FONTS # A list of fontnames to train on.
--fonts_dir FONTS_PATH # Path to font files.
--lang LANG_CODE # ISO 639 code.
--langdata_dir DATADIR # Path to tesseract/training/langdata directory.
--linedata_only # Only generate training data for lstmtraining.
--output_dir OUTPUTDIR # Location of output traineddata file.
--save_box_tiff # Save box/tiff pairs along with lstmf files.
--overwrite # Safe to overwrite files in output_dir.
--linedata_only # Only generate training data for lstmtraining.
--run_shape_clustering # Run shape clustering (use for Indic langs).
--exposures EXPOSURES # A list of exposure levels to use (e.g. "-1 0 1").
--maxpages # Specify maximum pages to output (default:0=all)
--save_box_tiff # Save box/tiff pairs along with lstmf files.
--x_size # Specify width of output image (default:3600)
OPTIONAL flags for input data. If unspecified we will look for them in
the langdata_dir directory.
--training_text TEXTFILE # Text to render and use for training.
--wordlist WORDFILE # Word list for the language ordered by
# decreasing frequency.
OPTIONAL flag to specify location of existing traineddata files, required
during feature extraction. If unspecified will use TESSDATA_PREFIX defined in
the current environment.
--tessdata_dir TESSDATADIR # Path to tesseract/tessdata directory.
NOTE:
The font names specified in --fontlist need to be recognizable by Pango using
fontconfig. An easy way to list the canonical names of all fonts available on
Expand Down
6 changes: 5 additions & 1 deletion src/training/tesstrain_utils.sh
Expand Up @@ -33,6 +33,7 @@ else
FONTS_DIR="/usr/share/fonts/"
fi

X_SIZE=3600
MAX_PAGES=0
SAVE_BOX_TIFF=0
OUTPUT_DIR="/tmp/tesstrain/tessdata"
Expand Down Expand Up @@ -185,6 +186,9 @@ parse_flags() {
FONT_CONFIG_CACHE=$WORKSPACE_DIR/fc-cache
mkdir -p $FONT_CONFIG_CACHE
i=$j ;;
--x_size)
parse_value "X_SIZE" ${ARGV[$j]:-}
i=$j ;;
*)
err_exit "Unrecognized argument ${ARGV[$i]}" ;;
esac
Expand Down Expand Up @@ -246,7 +250,7 @@ generate_font_image() {

local common_args="--fontconfig_tmpdir=${FONT_CONFIG_CACHE}"
common_args+=" --fonts_dir=${FONTS_DIR} --strip_unrenderable_words"
common_args+=" --leading=${LEADING}"
common_args+=" --leading=${LEADING} --xsize=${X_SIZE}"
common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}"
common_args+=" --outputbase=${outbase} --max_pages=${MAX_PAGES}"

Expand Down

0 comments on commit 4d9bc11

Please sign in to comment.