From 29f2cff203a8658cd5a601d06adf38945e7805e0 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 26 May 2019 08:53:02 +0200 Subject: [PATCH] training: Add missing static attributes That fixes several warnings from clang++ like the following one: src/training/combine_lang_model.cpp:36:1: warning: no previous extern declaration for non-static variable 'FLAGS_lang_is_rtl' [-Wmissing-variable-declarations] Signed-off-by: Stefan Weil --- src/training/combine_lang_model.cpp | 32 ++-- src/training/commontraining.cpp | 28 ++-- src/training/lstmeval.cpp | 19 ++- src/training/lstmtraining.cpp | 71 +++++---- src/training/mergenf.cpp | 12 +- src/training/set_unicharset_properties.cpp | 4 +- src/training/shapeclustering.cpp | 12 +- src/training/text2image.cpp | 172 +++++++++++---------- src/training/unicharset_extractor.cpp | 9 +- 9 files changed, 179 insertions(+), 180 deletions(-) diff --git a/src/training/combine_lang_model.cpp b/src/training/combine_lang_model.cpp index 2969b5b3ad..88d6664f51 100644 --- a/src/training/combine_lang_model.cpp +++ b/src/training/combine_lang_model.cpp @@ -22,22 +22,22 @@ #include "tprintf.h" #include "unicharset_training_utils.h" -STRING_PARAM_FLAG(input_unicharset, "", - "Filename with unicharset to complete and use in encoding"); -STRING_PARAM_FLAG(script_dir, "", - "Directory name for input script unicharsets"); -STRING_PARAM_FLAG(words, "", - "File listing words to use for the system dictionary"); -STRING_PARAM_FLAG(puncs, "", "File listing punctuation patterns"); -STRING_PARAM_FLAG(numbers, "", "File listing number patterns"); -STRING_PARAM_FLAG(output_dir, "", "Root directory for output files"); -STRING_PARAM_FLAG(version_str, "", "Version string to add to traineddata file"); -STRING_PARAM_FLAG(lang, "", "Name of language being processed"); -BOOL_PARAM_FLAG(lang_is_rtl, false, - "True if lang being processed is written right-to-left"); -BOOL_PARAM_FLAG(pass_through_recoder, false, - "If true, the recoder is a simple pass-through of the" - " unicharset. Otherwise, potentially a compression of it"); +static STRING_PARAM_FLAG(input_unicharset, "", + "Filename with unicharset to complete and use in encoding"); +static STRING_PARAM_FLAG(script_dir, "", + "Directory name for input script unicharsets"); +static STRING_PARAM_FLAG(words, "", + "File listing words to use for the system dictionary"); +static STRING_PARAM_FLAG(puncs, "", "File listing punctuation patterns"); +static STRING_PARAM_FLAG(numbers, "", "File listing number patterns"); +static STRING_PARAM_FLAG(output_dir, "", "Root directory for output files"); +static STRING_PARAM_FLAG(version_str, "", "Version string to add to traineddata file"); +static STRING_PARAM_FLAG(lang, "", "Name of language being processed"); +static BOOL_PARAM_FLAG(lang_is_rtl, false, + "True if lang being processed is written right-to-left"); +static BOOL_PARAM_FLAG(pass_through_recoder, false, + "If true, the recoder is a simple pass-through of the " + "unicharset. Otherwise, potentially a compression of it"); int main(int argc, char** argv) { // Sets properties on the input unicharset file, and writes: diff --git a/src/training/commontraining.cpp b/src/training/commontraining.cpp index 0e7747a330..d44f903458 100644 --- a/src/training/commontraining.cpp +++ b/src/training/commontraining.cpp @@ -87,27 +87,27 @@ using tesseract::ShapeTable; // -M 0.625 -B 0.05 -I 1.0 -C 1e-6. CLUSTERCONFIG Config = { elliptical, 0.625, 0.05, 1.0, 1e-6, 0 }; FEATURE_DEFS_STRUCT feature_defs; -CCUtil ccutil; +static CCUtil ccutil; INT_PARAM_FLAG(debug_level, 0, "Level of Trainer debugging"); -INT_PARAM_FLAG(load_images, 0, "Load images with tr files"); -STRING_PARAM_FLAG(configfile, "", "File to load more configs from"); +static INT_PARAM_FLAG(load_images, 0, "Load images with tr files"); +static STRING_PARAM_FLAG(configfile, "", "File to load more configs from"); STRING_PARAM_FLAG(D, "", "Directory to write output files to"); -STRING_PARAM_FLAG(F, "font_properties", "File listing font properties"); +static STRING_PARAM_FLAG(F, "font_properties", "File listing font properties"); STRING_PARAM_FLAG(X, "", "File listing font xheights"); STRING_PARAM_FLAG(U, "unicharset", "File to load unicharset from"); STRING_PARAM_FLAG(O, "", "File to write unicharset to"); -STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); +static STRING_PARAM_FLAG(output_trainer, "", "File to write trainer to"); STRING_PARAM_FLAG(test_ch, "", "UTF8 test character string"); -DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples, - "Min number of samples per proto as % of total"); -DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal, - "Max percentage of samples in a cluster which have more" - " than 1 feature in that cluster"); -DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence, - "Desired independence between dimensions"); -DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence, - "Desired confidence in prototypes created"); +static DOUBLE_PARAM_FLAG(clusterconfig_min_samples_fraction, Config.MinSamples, + "Min number of samples per proto as % of total"); +static DOUBLE_PARAM_FLAG(clusterconfig_max_illegal, Config.MaxIllegal, + "Max percentage of samples in a cluster which have more" + " than 1 feature in that cluster"); +static DOUBLE_PARAM_FLAG(clusterconfig_independence, Config.Independence, + "Desired independence between dimensions"); +static DOUBLE_PARAM_FLAG(clusterconfig_confidence, Config.Confidence, + "Desired confidence in prototypes created"); /** * This routine parses the command line arguments that were diff --git a/src/training/lstmeval.cpp b/src/training/lstmeval.cpp index 3492a14c92..988755e22d 100644 --- a/src/training/lstmeval.cpp +++ b/src/training/lstmeval.cpp @@ -2,7 +2,6 @@ // File: lstmeval.cpp // Description: Evaluation program for LSTM-based networks. // Author: Ray Smith -// Created: Wed Nov 23 12:20:06 PST 2016 // // (C) Copyright 2016, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -25,15 +24,15 @@ #include "strngs.h" #include "tprintf.h" -STRING_PARAM_FLAG(model, "", "Name of model file (training or recognition)"); -STRING_PARAM_FLAG(traineddata, "", - "If model is a training checkpoint, then traineddata must " - "be the traineddata file that was given to the trainer"); -STRING_PARAM_FLAG(eval_listfile, "", - "File listing sample files in lstmf training format."); -INT_PARAM_FLAG(max_image_MB, 2000, "Max memory to use for images."); -INT_PARAM_FLAG(verbosity, 1, - "Amount of diagnosting information to output (0-2)."); +static STRING_PARAM_FLAG(model, "", "Name of model file (training or recognition)"); +static STRING_PARAM_FLAG(traineddata, "", + "If model is a training checkpoint, then traineddata must " + "be the traineddata file that was given to the trainer"); +static STRING_PARAM_FLAG(eval_listfile, "", + "File listing sample files in lstmf training format."); +static INT_PARAM_FLAG(max_image_MB, 2000, "Max memory to use for images."); +static INT_PARAM_FLAG(verbosity, 1, + "Amount of diagnosting information to output (0-2)."); int main(int argc, char **argv) { tesseract::CheckSharedLibraryVersion(); diff --git a/src/training/lstmtraining.cpp b/src/training/lstmtraining.cpp index d3eb7747c9..b6644259c8 100644 --- a/src/training/lstmtraining.cpp +++ b/src/training/lstmtraining.cpp @@ -2,7 +2,6 @@ // File: lstmtraining.cpp // Description: Training program for LSTM-based networks. // Author: Ray Smith -// Created: Fri May 03 11:05:06 PST 2013 // // (C) Copyright 2013, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,41 +27,41 @@ #include "tprintf.h" #include "unicharset_training_utils.h" -INT_PARAM_FLAG(debug_interval, 0, "How often to display the alignment."); -STRING_PARAM_FLAG(net_spec, "", "Network specification"); -INT_PARAM_FLAG(net_mode, 192, "Controls network behavior."); -INT_PARAM_FLAG(perfect_sample_delay, 0, - "How many imperfect samples between perfect ones."); -DOUBLE_PARAM_FLAG(target_error_rate, 0.01, "Final error rate in percent."); -DOUBLE_PARAM_FLAG(weight_range, 0.1, "Range of initial random weights."); -DOUBLE_PARAM_FLAG(learning_rate, 10.0e-4, "Weight factor for new deltas."); -DOUBLE_PARAM_FLAG(momentum, 0.5, "Decay factor for repeating deltas."); -DOUBLE_PARAM_FLAG(adam_beta, 0.999, "Decay factor for repeating deltas."); -INT_PARAM_FLAG(max_image_MB, 6000, "Max memory to use for images."); -STRING_PARAM_FLAG(continue_from, "", "Existing model to extend"); -STRING_PARAM_FLAG(model_output, "lstmtrain", "Basename for output models"); -STRING_PARAM_FLAG(train_listfile, "", - "File listing training files in lstmf training format."); -STRING_PARAM_FLAG(eval_listfile, "", - "File listing eval files in lstmf training format."); -BOOL_PARAM_FLAG(stop_training, false, - "Just convert the training model to a runtime model."); -BOOL_PARAM_FLAG(convert_to_int, false, - "Convert the recognition model to an integer model."); -BOOL_PARAM_FLAG(sequential_training, false, - "Use the training files sequentially instead of round-robin."); -INT_PARAM_FLAG(append_index, -1, "Index in continue_from Network at which to" - " attach the new network defined by net_spec"); -BOOL_PARAM_FLAG(debug_network, false, - "Get info on distribution of weight values"); -INT_PARAM_FLAG(max_iterations, 0, "If set, exit after this many iterations"); -STRING_PARAM_FLAG(traineddata, "", - "Combined Dawgs/Unicharset/Recoder for language model"); -STRING_PARAM_FLAG(old_traineddata, "", - "When changing the character set, this specifies the old" - " character set that is to be replaced"); -BOOL_PARAM_FLAG(randomly_rotate, false, - "Train OSD and randomly turn training samples upside-down"); +static INT_PARAM_FLAG(debug_interval, 0, "How often to display the alignment."); +static STRING_PARAM_FLAG(net_spec, "", "Network specification"); +static INT_PARAM_FLAG(net_mode, 192, "Controls network behavior."); +static INT_PARAM_FLAG(perfect_sample_delay, 0, + "How many imperfect samples between perfect ones."); +static DOUBLE_PARAM_FLAG(target_error_rate, 0.01, "Final error rate in percent."); +static DOUBLE_PARAM_FLAG(weight_range, 0.1, "Range of initial random weights."); +static DOUBLE_PARAM_FLAG(learning_rate, 10.0e-4, "Weight factor for new deltas."); +static DOUBLE_PARAM_FLAG(momentum, 0.5, "Decay factor for repeating deltas."); +static DOUBLE_PARAM_FLAG(adam_beta, 0.999, "Decay factor for repeating deltas."); +static INT_PARAM_FLAG(max_image_MB, 6000, "Max memory to use for images."); +static STRING_PARAM_FLAG(continue_from, "", "Existing model to extend"); +static STRING_PARAM_FLAG(model_output, "lstmtrain", "Basename for output models"); +static STRING_PARAM_FLAG(train_listfile, "", + "File listing training files in lstmf training format."); +static STRING_PARAM_FLAG(eval_listfile, "", + "File listing eval files in lstmf training format."); +static BOOL_PARAM_FLAG(stop_training, false, + "Just convert the training model to a runtime model."); +static BOOL_PARAM_FLAG(convert_to_int, false, + "Convert the recognition model to an integer model."); +static BOOL_PARAM_FLAG(sequential_training, false, + "Use the training files sequentially instead of round-robin."); +static INT_PARAM_FLAG(append_index, -1, "Index in continue_from Network at which to" + " attach the new network defined by net_spec"); +static BOOL_PARAM_FLAG(debug_network, false, + "Get info on distribution of weight values"); +static INT_PARAM_FLAG(max_iterations, 0, "If set, exit after this many iterations"); +static STRING_PARAM_FLAG(traineddata, "", + "Combined Dawgs/Unicharset/Recoder for language model"); +static STRING_PARAM_FLAG(old_traineddata, "", + "When changing the character set, this specifies the old" + " character set that is to be replaced"); +static BOOL_PARAM_FLAG(randomly_rotate, false, + "Train OSD and randomly turn training samples upside-down"); // Number of training images to train between calls to MaintainCheckpoints. const int kNumPagesPerBatch = 100; diff --git a/src/training/mergenf.cpp b/src/training/mergenf.cpp index 59106bd076..bc2db17476 100644 --- a/src/training/mergenf.cpp +++ b/src/training/mergenf.cpp @@ -33,18 +33,18 @@ #include "params.h" /*-------------------once in subfeat---------------------------------*/ -double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ..."); +static double_VAR(training_angle_match_scale, 1.0, "Angle Match Scale ..."); -double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ..."); +static double_VAR(training_similarity_midpoint, 0.0075, "Similarity Midpoint ..."); -double_VAR(training_similarity_curl, 2.0, "Similarity Curl ..."); +static double_VAR(training_similarity_curl, 2.0, "Similarity Curl ..."); /*-----------------------------once in fasttrain----------------------------------*/ -double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ..."); +static double_VAR(training_tangent_bbox_pad, 0.5, "Tangent bounding box pad ..."); -double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ..."); +static double_VAR(training_orthogonal_bbox_pad, 2.5, "Orthogonal bounding box pad ..."); -double_VAR(training_angle_pad, 45.0, "Angle pad ..."); +static double_VAR(training_angle_pad, 45.0, "Angle pad ..."); /** * Compare protos p1 and p2 and return an estimate of the diff --git a/src/training/set_unicharset_properties.cpp b/src/training/set_unicharset_properties.cpp index a402d9f196..866ac39329 100644 --- a/src/training/set_unicharset_properties.cpp +++ b/src/training/set_unicharset_properties.cpp @@ -18,8 +18,8 @@ #include "unicharset_training_utils.h" // The directory that is searched for universal script unicharsets. -STRING_PARAM_FLAG(script_dir, "", - "Directory name for input script unicharsets/xheights"); +static STRING_PARAM_FLAG(script_dir, "", + "Directory name for input script unicharsets/xheights"); int main(int argc, char** argv) { tesseract::CheckSharedLibraryVersion(); diff --git a/src/training/shapeclustering.cpp b/src/training/shapeclustering.cpp index 8567a3daa2..fe5452d704 100644 --- a/src/training/shapeclustering.cpp +++ b/src/training/shapeclustering.cpp @@ -28,12 +28,12 @@ #include "params.h" #include "strngs.h" -INT_PARAM_FLAG(display_cloud_font, -1, - "Display cloud of this font, canonical_class1"); -INT_PARAM_FLAG(display_canonical_font, -1, - "Display canonical sample of this font, canonical_class2"); -STRING_PARAM_FLAG(canonical_class1, "", "Class to show ambigs for"); -STRING_PARAM_FLAG(canonical_class2, "", "Class to show ambigs for"); +static INT_PARAM_FLAG(display_cloud_font, -1, + "Display cloud of this font, canonical_class1"); +static INT_PARAM_FLAG(display_canonical_font, -1, + "Display canonical sample of this font, canonical_class2"); +static STRING_PARAM_FLAG(canonical_class1, "", "Class to show ambigs for"); +static STRING_PARAM_FLAG(canonical_class2, "", "Class to show ambigs for"); // Loads training data, if requested displays debug information, otherwise // creates the master shape table by shape clustering and writes it to a file. diff --git a/src/training/text2image.cpp b/src/training/text2image.cpp index 12f289ea4e..ab0b3f733a 100644 --- a/src/training/text2image.cpp +++ b/src/training/text2image.cpp @@ -11,9 +11,7 @@ * the appropriate --fonts_dir path. * Specifying --use_only_legacy_fonts will restrict the available * fonts to those listed in legacy_fonts.h - * * Authors: Ranjith Unnikrishnan, Ray Smith - * Created: Tue Nov 19 2013 * * (C) Copyright 2013, Google Inc. * Licensed under the Apache License, Version 2.0 (the "License"); @@ -58,140 +56,144 @@ const int kRandomSeed = 0x18273645; // The text input file. -STRING_PARAM_FLAG(text, "", "File name of text input to process"); +static STRING_PARAM_FLAG(text, "", "File name of text input to process"); // The text output file. -STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file"); +static STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file"); // Degrade the rendered image to mimic scanner quality. -BOOL_PARAM_FLAG(degrade_image, true, - "Degrade rendered image with speckle noise, dilation/erosion " - "and rotation"); +static BOOL_PARAM_FLAG(degrade_image, true, + "Degrade rendered image with speckle noise, dilation/erosion " + "and rotation"); // Rotate the rendered image to have more realistic glyph borders -BOOL_PARAM_FLAG(rotate_image, true, "Rotate the image in a random way."); +static BOOL_PARAM_FLAG(rotate_image, true, "Rotate the image in a random way."); // Degradation to apply to the image. -INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier"); +static INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier"); // Distort the rendered image by various means according to the bool flags. -BOOL_PARAM_FLAG(distort_image, false, - "Degrade rendered image with noise, blur, invert."); +static BOOL_PARAM_FLAG(distort_image, false, + "Degrade rendered image with noise, blur, invert."); // Distortion to apply to the image. -BOOL_PARAM_FLAG(invert, true, "Invert the image"); +static BOOL_PARAM_FLAG(invert, true, "Invert the image"); // Distortion to apply to the image. -BOOL_PARAM_FLAG(white_noise, true, "Add Gaussian Noise"); +static BOOL_PARAM_FLAG(white_noise, true, "Add Gaussian Noise"); // Distortion to apply to the image. -BOOL_PARAM_FLAG(smooth_noise, true, "Smoothen Noise"); +static BOOL_PARAM_FLAG(smooth_noise, true, "Smoothen Noise"); // Distortion to apply to the image. -BOOL_PARAM_FLAG(blur, true, "Blur the image"); +static BOOL_PARAM_FLAG(blur, true, "Blur the image"); + +#if 0 // Distortion to apply to the image. -//BOOL_PARAM_FLAG(perspective, false, "Generate Perspective Distortion"); +static BOOL_PARAM_FLAG(perspective, false, "Generate Perspective Distortion"); // Distortion to apply to the image. -//INT_PARAM_FLAG(box_reduction, 0, "Integer reduction factor box_scale"); +static INT_PARAM_FLAG(box_reduction, 0, "Integer reduction factor box_scale"); + +#endif // Output image resolution. -INT_PARAM_FLAG(resolution, 300, "Pixels per inch"); +static INT_PARAM_FLAG(resolution, 300, "Pixels per inch"); // Width of output image (in pixels). -INT_PARAM_FLAG(xsize, 3600, "Width of output image"); +static INT_PARAM_FLAG(xsize, 3600, "Width of output image"); // Max height of output image (in pixels). -INT_PARAM_FLAG(ysize, 4800, "Height of output image"); +static INT_PARAM_FLAG(ysize, 4800, "Height of output image"); // Max number of pages to produce. -INT_PARAM_FLAG(max_pages, 0, "Maximum number of pages to output (0=unlimited)"); +static INT_PARAM_FLAG(max_pages, 0, "Maximum number of pages to output (0=unlimited)"); // Margin around text (in pixels). -INT_PARAM_FLAG(margin, 100, "Margin round edges of image"); +static INT_PARAM_FLAG(margin, 100, "Margin round edges of image"); // Size of text (in points). -INT_PARAM_FLAG(ptsize, 12, "Size of printed text"); +static INT_PARAM_FLAG(ptsize, 12, "Size of printed text"); // Inter-character space (in ems). -DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems"); +static DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems"); // Sets the probability (value in [0, 1]) of starting to render a word with an // underline. Words are assumed to be space-delimited. -DOUBLE_PARAM_FLAG(underline_start_prob, 0, - "Fraction of words to underline (value in [0,1])"); +static DOUBLE_PARAM_FLAG(underline_start_prob, 0, + "Fraction of words to underline (value in [0,1])"); // Set the probability (value in [0, 1]) of continuing a started underline to // the next word. -DOUBLE_PARAM_FLAG(underline_continuation_prob, 0, - "Fraction of words to underline (value in [0,1])"); +static DOUBLE_PARAM_FLAG(underline_continuation_prob, 0, + "Fraction of words to underline (value in [0,1])"); // Inter-line space (in pixels). -INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)"); +static INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)"); // Layout and glyph orientation on rendering. -STRING_PARAM_FLAG(writing_mode, "horizontal", - "Specify one of the following writing" - " modes.\n" - "'horizontal' : Render regular horizontal text. (default)\n" - "'vertical' : Render vertical text. Glyph orientation is" - " selected by Pango.\n" - "'vertical-upright' : Render vertical text. Glyph " - " orientation is set to be upright."); +static STRING_PARAM_FLAG(writing_mode, "horizontal", + "Specify one of the following writing" + " modes.\n" + "'horizontal' : Render regular horizontal text. (default)\n" + "'vertical' : Render vertical text. Glyph orientation is" + " selected by Pango.\n" + "'vertical-upright' : Render vertical text. Glyph " + " orientation is set to be upright."); -INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes"); +static INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes"); -BOOL_PARAM_FLAG(strip_unrenderable_words, true, - "Remove unrenderable words from source text"); +static BOOL_PARAM_FLAG(strip_unrenderable_words, true, + "Remove unrenderable words from source text"); // Font name. -STRING_PARAM_FLAG(font, "Arial", "Font description name to use"); - -BOOL_PARAM_FLAG(ligatures, false, - "Rebuild and render ligatures"); - -BOOL_PARAM_FLAG(find_fonts, false, - "Search for all fonts that can render the text"); -BOOL_PARAM_FLAG(render_per_font, true, - "If find_fonts==true, render each font to its own image. " - "Image filenames are of the form output_name.font_name.tif"); -DOUBLE_PARAM_FLAG(min_coverage, 1.0, - "If find_fonts==true, the minimum coverage the font has of " - "the characters in the text file to include it, between " - "0 and 1."); - -BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit."); - -BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the" - " input file into one bounding box. The ngrams in the input" - " file will be randomly permuted before rendering (so that" - " there is sufficient variety of characters on each line)."); - -BOOL_PARAM_FLAG(output_word_boxes, false, - "Output word bounding boxes instead of character boxes. " - "This is used for Cube training, and implied by " - "--render_ngrams."); - -STRING_PARAM_FLAG(unicharset_file, "", - "File with characters in the unicharset. If --render_ngrams" - " is true and --unicharset_file is specified, ngrams with" - " characters that are not in unicharset will be omitted"); - -BOOL_PARAM_FLAG(bidirectional_rotation, false, - "Rotate the generated characters both ways."); - -BOOL_PARAM_FLAG(only_extract_font_properties, false, - "Assumes that the input file contains a list of ngrams. Renders" - " each ngram, extracts spacing properties and records them in" - " output_base/[font_name].fontinfo file."); +static STRING_PARAM_FLAG(font, "Arial", "Font description name to use"); + +static BOOL_PARAM_FLAG(ligatures, false, + "Rebuild and render ligatures"); + +static BOOL_PARAM_FLAG(find_fonts, false, + "Search for all fonts that can render the text"); +static BOOL_PARAM_FLAG(render_per_font, true, + "If find_fonts==true, render each font to its own image. " + "Image filenames are of the form output_name.font_name.tif"); +static DOUBLE_PARAM_FLAG(min_coverage, 1.0, + "If find_fonts==true, the minimum coverage the font has of " + "the characters in the text file to include it, between " + "0 and 1."); + +static BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit."); + +static BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the" + " input file into one bounding box. The ngrams in the input" + " file will be randomly permuted before rendering (so that" + " there is sufficient variety of characters on each line)."); + +static BOOL_PARAM_FLAG(output_word_boxes, false, + "Output word bounding boxes instead of character boxes. " + "This is used for Cube training, and implied by " + "--render_ngrams."); + +static STRING_PARAM_FLAG(unicharset_file, "", + "File with characters in the unicharset. If --render_ngrams" + " is true and --unicharset_file is specified, ngrams with" + " characters that are not in unicharset will be omitted"); + +static BOOL_PARAM_FLAG(bidirectional_rotation, false, + "Rotate the generated characters both ways."); + +static BOOL_PARAM_FLAG(only_extract_font_properties, false, + "Assumes that the input file contains a list of ngrams. Renders" + " each ngram, extracts spacing properties and records them in" + " output_base/[font_name].fontinfo file."); // Use these flags to output zero-padded, square individual character images -BOOL_PARAM_FLAG(output_individual_glyph_images, false, - "If true also outputs individual character images"); -INT_PARAM_FLAG(glyph_resized_size, 0, - "Each glyph is square with this side length in pixels"); -INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0, - "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad"); +static BOOL_PARAM_FLAG(output_individual_glyph_images, false, + "If true also outputs individual character images"); +static INT_PARAM_FLAG(glyph_resized_size, 0, + "Each glyph is square with this side length in pixels"); +static INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0, + "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad"); namespace tesseract { diff --git a/src/training/unicharset_extractor.cpp b/src/training/unicharset_extractor.cpp index 64b93717a0..166bc9475d 100644 --- a/src/training/unicharset_extractor.cpp +++ b/src/training/unicharset_extractor.cpp @@ -2,7 +2,6 @@ // File: unicharset_extractor.cpp // Description: Unicode character/ligature set extractor. // Author: Thomas Kielbus -// Created: Wed Jun 28 17:05:01 PDT 2006 // // (C) Copyright 2006, Google Inc. // Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,10 +31,10 @@ #include "unicharset.h" #include "unicharset_training_utils.h" -STRING_PARAM_FLAG(output_unicharset, "unicharset", "Output file path"); -INT_PARAM_FLAG(norm_mode, 1, - "Normalization mode: 1=Combine graphemes, " - "2=Split graphemes, 3=Pure unicode"); +static STRING_PARAM_FLAG(output_unicharset, "unicharset", "Output file path"); +static INT_PARAM_FLAG(norm_mode, 1, + "Normalization mode: 1=Combine graphemes, " + "2=Split graphemes, 3=Pure unicode"); namespace tesseract {