From 5d21ecfad3eddd820aac6b585ce6fe01f357e403 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Mon, 7 Nov 2016 11:56:07 -0800 Subject: [PATCH] Rendering/hash map changes part 2 --- training/boxchar.cpp | 12 ++++++++--- training/boxchar.h | 3 +++ training/cntraining.cpp | 40 +++++++++++++++++++----------------- training/pango_font_info.cpp | 2 +- 4 files changed, 34 insertions(+), 23 deletions(-) diff --git a/training/boxchar.cpp b/training/boxchar.cpp index b99c12a600..4324597744 100644 --- a/training/boxchar.cpp +++ b/training/boxchar.cpp @@ -73,7 +73,6 @@ void BoxChar::PrepareToWrite(vector* boxes) { if (rtl_rules) { ReorderRTLText(boxes); } - tprintf("Rtl = %d ,vertical=%d\n", rtl_rules, vertical_rules); } // Inserts newline (tab) characters into the vector at newline positions. @@ -291,13 +290,19 @@ const int kMaxLineLength = 1024; /* static */ void BoxChar::WriteTesseractBoxFile(const string& filename, int height, const vector& boxes) { + string output = GetTesseractBoxStr(height, boxes); + File::WriteStringToFileOrDie(output, filename); +} + +/* static */ +string BoxChar::GetTesseractBoxStr(int height, const vector& boxes) { string output; char buffer[kMaxLineLength]; for (int i = 0; i < boxes.size(); ++i) { const Box* box = boxes[i]->box_; if (box == NULL) { tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n"); - return; + return ""; } int nbytes = snprintf(buffer, kMaxLineLength, "%s %d %d %d %d %d\n", @@ -305,6 +310,7 @@ void BoxChar::WriteTesseractBoxFile(const string& filename, int height, box->x + box->w, height - box->y, boxes[i]->page_); output.append(buffer, nbytes); } - File::WriteStringToFileOrDie(output, filename); + return output; } + } // namespace tesseract diff --git a/training/boxchar.h b/training/boxchar.h index 27b568a143..3748c4abad 100644 --- a/training/boxchar.h +++ b/training/boxchar.h @@ -100,6 +100,9 @@ class BoxChar { // is needed to convert to tesseract coordinates. static void WriteTesseractBoxFile(const string& name, int height, const vector& boxes); + // Gets the tesseract box file as a string from the vector of boxes. + // The image height is needed to convert to tesseract coordinates. + static string GetTesseractBoxStr(int height, const vector& boxes); private: string ch_; diff --git a/training/cntraining.cpp b/training/cntraining.cpp index 1a79865554..6f4f42aebe 100644 --- a/training/cntraining.cpp +++ b/training/cntraining.cpp @@ -160,13 +160,18 @@ int main(int argc, char *argv[]) { // reduce the min samples: // Config.MinSamples = 0.5 / num_fonts; pCharList = CharList; + // The norm protos will count the source protos, so we keep them here in + // freeable_protos, so they can be freed later. + GenericVector freeable_protos; iterate(pCharList) { //Cluster - if (Clusterer) - FreeClusterer(Clusterer); CharSample = (LABELEDLIST)first_node(pCharList); Clusterer = SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); + if (Clusterer == NULL) { // To avoid a SIGSEGV + fprintf(stderr, "Error: NULL clusterer!\n"); + return 1; + } float SavedMinSamples = Config.MinSamples; // To disable the tendency to produce a single cluster for all fonts, // make MagicSamples an impossible to achieve number: @@ -185,21 +190,21 @@ int main(int argc, char *argv[]) { } Config.MinSamples = SavedMinSamples; AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); + freeable_protos.push_back(ProtoList); + FreeClusterer(Clusterer); } FreeTrainingSamples(CharList); - if (Clusterer == NULL) { // To avoid a SIGSEGV - fprintf(stderr, "Error: NULL clusterer!\n"); - return 1; - } - WriteNormProtos(FLAGS_D.c_str(), NormProtoList, Clusterer); + int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE); + WriteNormProtos(FLAGS_D.c_str(), NormProtoList, + FeatureDefs.FeatureDesc[desc_index]); FreeNormProtoList(NormProtoList); - FreeProtoList(&ProtoList); - FreeClusterer(Clusterer); + for (int i = 0; i < freeable_protos.size(); ++i) { + FreeProtoList(&freeable_protos[i]); + } printf ("\n"); return 0; } // main - /*---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------*/ @@ -211,16 +216,13 @@ int main(int argc, char *argv[]) { * of the samples. * @param Directory directory to place sample files into * @param LabeledProtoList List of labeled protos -* @param Clusterer The CLUSTERER to use +* @param feature_desc Description of the features * @return none * @note Exceptions: none * @note History: Fri Aug 18 16:17:06 1989, DSJ, Created. */ -void WriteNormProtos ( - const char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer) -{ +void WriteNormProtos(const char *Directory, LIST LabeledProtoList, + const FEATURE_DESC_STRUCT *feature_desc) { FILE *File; STRING Filename; LABELEDLIST LabeledProto; @@ -235,8 +237,8 @@ void WriteNormProtos ( Filename += "normproto"; printf ("\nWriting %s ...", Filename.string()); File = Efopen (Filename.string(), "wb"); - fprintf(File,"%0d\n",Clusterer->SampleSize); - WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc); + fprintf(File, "%0d\n", feature_desc->NumParams); + WriteParamDesc(File, feature_desc->NumParams, feature_desc->ParamDesc); iterate(LabeledProtoList) { LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); @@ -251,7 +253,7 @@ void WriteNormProtos ( exit(1); } fprintf(File, "\n%s %d\n", LabeledProto->Label, N); - WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, true, false); + WriteProtos(File, feature_desc->NumParams, LabeledProto->List, true, false); } fclose (File); diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp index 9a4d44ff2c..2a26d70035 100644 --- a/training/pango_font_info.cpp +++ b/training/pango_font_info.cpp @@ -127,7 +127,7 @@ string PangoFontInfo::DescriptionName() const { /* static */ void PangoFontInfo::SoftInitFontConfig() { if (fonts_dir_.empty()) { - HardInitFontConfig(FLAGS_fonts_dir, FLAGS_fontconfig_tmpdir); + HardInitFontConfig(FLAGS_fonts_dir.c_str(), FLAGS_fontconfig_tmpdir.c_str()); } }