diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index 9f2822e4ac..7b5ab3e869 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -1560,8 +1560,8 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { // Now, process the word... std::vector>>* confidencemap = nullptr; - if (tesseract_->glyph_confidences) { - confidencemap = res_it->GetGlyphConfidences(); + if (tesseract_->lstm_choice_mode) { + confidencemap = res_it->GetChoices(); } hocr_str += "\n Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); if (italic) hocr_str += ""; if (bold) hocr_str += ""; - // If glyph confidence is required it is added here - if (tesseract_->glyph_confidences == 1 && confidencemap != nullptr) { + // If the lstm choice mode is required it is added here + if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) { for (size_t i = 0; i < confidencemap->size(); i++) { hocr_str += "\n > timestep = (*confidencemap)[i]; for (std::pair conf : timestep) { hocr_str += "glyph_confidences == 2 && confidencemap != nullptr) { + } else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) { for (size_t i = 0; i < confidencemap->size(); i++) { std::vector> timestep = (*confidencemap)[i]; if (timestep.size() > 0) { hocr_str += "\n RecognizeLine(*im_data, true, classify_debug_level > 0, kWorstDictCertainty / kCertaintyScale, - word_box, words, glyph_confidences); + word_box, words, lstm_choice_mode); delete im_data; SearchWords(words); } diff --git a/src/ccmain/resultiterator.cpp b/src/ccmain/resultiterator.cpp index 66381b3fd3..62a5f34548 100644 --- a/src/ccmain/resultiterator.cpp +++ b/src/ccmain/resultiterator.cpp @@ -604,7 +604,7 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const { return result; } -std::vector>>* ResultIterator::GetGlyphConfidences() const { +std::vector>>* ResultIterator::GetChoices() const { if (it_->word() != nullptr) { return &it_->word()->timesteps; } else { diff --git a/src/ccmain/resultiterator.h b/src/ccmain/resultiterator.h index 8526aed75f..8738897dab 100644 --- a/src/ccmain/resultiterator.h +++ b/src/ccmain/resultiterator.h @@ -98,9 +98,9 @@ class TESS_API ResultIterator : public LTRResultIterator { virtual char* GetUTF8Text(PageIteratorLevel level) const; /** - * Returns the glyph confidences for every LSTM timestep for the current Word + * Returns the lstm choices for every LSTM timestep for the current Word */ - virtual std::vector>>* GetGlyphConfidences() const; + virtual std::vector>>* GetChoices() const; /** * Return whether the current paragraph's dominant reading direction diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index 3cac8dd251..96baa940ee 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -514,12 +514,12 @@ Tesseract::Tesseract() STRING_MEMBER(page_separator, "\f", "Page separator (default is form feed control character)", this->params()), - INT_MEMBER(glyph_confidences, 0, - "Allows to include glyph confidences in the hOCR output. " - "Valid input values are 0, 1 and 2. 0 is the default value. " - "With 1 the glyph confidences of all timesteps are included. " - "With 2 the glyph confidences are accumulated per charakter.", - this->params()), + INT_MEMBER(lstm_choice_mode, 0, + "Allows to include alternative symbols choices in the hOCR output. " + "Valid input values are 0, 1 and 2. 0 is the default value. " + "With 1 the alternative symbol choices per timestep are included. " + "With 2 the alternative symbol choices are accumulated per character.", + this->params()), backup_config_file_(nullptr), pix_binary_(nullptr), diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 71b08347a0..919e0801a7 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -1118,11 +1118,11 @@ class Tesseract : public Wordrec { "Preserve multiple interword spaces"); STRING_VAR_H(page_separator, "\f", "Page separator (default is form feed control character)"); - INT_VAR_H(glyph_confidences, 0, - "Allows to include glyph confidences in the hOCR output. " + INT_VAR_H(lstm_choice_mode, 0, + "Allows to include alternative symbols choices in the hOCR output. " "Valid input values are 0, 1 and 2. 0 is the default value. " - "With 1 the glyph confidences of all timesteps are included. " - "With 2 the glyph confidences are accumulated per charakter."); + "With 1 the alternative symbol choices per timestep are included. " + "With 2 the alternative symbol choices are accumulated per character."); //// ambigsrecog.cpp ///////////////////////////////////////////////////////// FILE *init_recog_training(const STRING &fname); diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h index cb559fb179..6aea3283b9 100644 --- a/src/ccstruct/pageres.h +++ b/src/ccstruct/pageres.h @@ -220,7 +220,7 @@ class WERD_RES : public ELIST_LINK { // Gaps between blobs in chopped_word. blob_gaps[i] is the gap between // blob i and blob i+1. GenericVector blob_gaps; - // Stores the glyph confidences of every timestep of the lstm + // Stores the lstm choices of every timestep std::vector>> timesteps; // Ratings matrix contains classifier choices for each classified combination // of blobs. The dimension is the same as the number of blobs in chopped_word diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp index 62ca990051..acbc36f12b 100644 --- a/src/lstm/lstmrecognizer.cpp +++ b/src/lstm/lstmrecognizer.cpp @@ -173,7 +173,7 @@ void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, bool debug, double worst_dict_cert, const TBOX& line_box, PointerVector* words, - int glyph_confidences) { + int lstm_choice_mode) { NetworkIO outputs; float scale_factor; NetworkIO inputs; @@ -185,10 +185,9 @@ void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert, new RecodeBeamSearch(recoder_, null_char_, SimpleTextOutput(), dict_); } search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert, - &GetUnicharset(), glyph_confidences); + &GetUnicharset(), lstm_choice_mode); search_->ExtractBestPathAsWords(line_box, scale_factor, debug, - &GetUnicharset(), words, - glyph_confidences); + &GetUnicharset(), words, lstm_choice_mode); } // Helper computes min and mean best results in the output. diff --git a/src/lstm/lstmrecognizer.h b/src/lstm/lstmrecognizer.h index dcfbc2b5c2..75054b8e8c 100644 --- a/src/lstm/lstmrecognizer.h +++ b/src/lstm/lstmrecognizer.h @@ -184,8 +184,7 @@ class LSTMRecognizer { // will be used in a dictionary word. void RecognizeLine(const ImageData& image_data, bool invert, bool debug, double worst_dict_cert, const TBOX& line_box, - PointerVector* words, - int glyph_confidences = 0); + PointerVector* words, int lstm_choice_mode = 0); // Helper computes min and mean best results in the output. void OutputStats(const NetworkIO& outputs, diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp index 7d76a31f06..a7cb1d4ba3 100644 --- a/src/lstm/recodebeam.cpp +++ b/src/lstm/recodebeam.cpp @@ -81,17 +81,17 @@ RecodeBeamSearch::RecodeBeamSearch(const UnicharCompress& recoder, // Decodes the set of network outputs, storing the lattice internally. void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio, double cert_offset, double worst_dict_cert, - const UNICHARSET* charset, int glyph_confidence) { + const UNICHARSET* charset, int lstm_choice_mode) { beam_size_ = 0; int width = output.Width(); - if (glyph_confidence) + if (lstm_choice_mode) timesteps.clear(); for (int t = 0; t < width; ++t) { ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]); DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert, charset); - if (glyph_confidence) { - SaveMostCertainGlyphs(output.f(t), output.NumFeatures(), charset, t); + if (lstm_choice_mode) { + SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t); } } } @@ -107,33 +107,33 @@ void RecodeBeamSearch::Decode(const GENERIC_2D_ARRAY& output, } } -void RecodeBeamSearch::SaveMostCertainGlyphs(const float* outputs, +void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord) { - std::vector> glyphs; + std::vector> choices; int pos = 0; for (int i = 0; i < num_outputs; ++i) { if (outputs[i] >= 0.01f) { - const char* charakter; + const char* character; if (i + 2 >= num_outputs) { - charakter = ""; + character = ""; } else if (i > 0) { - charakter = charset->id_to_unichar_ext(i + 2); + character = charset->id_to_unichar_ext(i + 2); } else { - charakter = charset->id_to_unichar_ext(i); + character = charset->id_to_unichar_ext(i); } pos = 0; - //order the possible glyphs within one timestep + //order the possible choices within one timestep //beginning with the most likely - while (glyphs.size() > pos && glyphs[pos].second > outputs[i]) { + while (choices.size() > pos && choices[pos].second > outputs[i]) { pos++; } - glyphs.insert(glyphs.begin() + pos, - std::pair(charakter, outputs[i])); + choices.insert(choices.begin() + pos, + std::pair(character, outputs[i])); } } - timesteps.push_back(glyphs); + timesteps.push_back(choices); } // Returns the best path as labels/scores/xcoords similar to simple CTC. @@ -179,7 +179,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, float scale_factor, bool debug, const UNICHARSET* unicharset, PointerVector* words, - int glyph_confidence) { + int lstm_choice_mode) { words->truncate(0); GenericVector unichar_ids; GenericVector certs; @@ -187,7 +187,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, GenericVector xcoords; GenericVector best_nodes; GenericVector second_nodes; - std::deque> best_glyphs; + std::deque> best_choices; ExtractBestPaths(&best_nodes, &second_nodes); if (debug) { DebugPath(unicharset, best_nodes); @@ -199,15 +199,15 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, } int current_char; int timestepEnd = 0; - //if glyph confidence is required in granularity level 2 it stores the x - //Coordinates of every chosen character to match the alternative glyphs to it - if (glyph_confidence == 2) { + //if lstm choice mode is required in granularity level 2 it stores the x + //Coordinates of every chosen character to match the alternative choices to it + if (lstm_choice_mode == 2) { ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, - &xcoords, &best_glyphs); - if (best_glyphs.size() > 0) { - current_char = best_glyphs.front().first; - timestepEnd = best_glyphs.front().second; - best_glyphs.pop_front(); + &xcoords, &best_choices); + if (best_choices.size() > 0) { + current_char = best_choices.front().first; + timestepEnd = best_choices.front().second; + best_choices.pop_front(); } } else { ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings, @@ -243,25 +243,25 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, WERD_RES* word_res = InitializeWord( leading_space, line_box, word_start, word_end, std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor); - if (glyph_confidence == 1) { + if (lstm_choice_mode == 1) { for (size_t i = timestepEnd; i < xcoords[word_end]; i++) { word_res->timesteps.push_back(timesteps[i]); } timestepEnd = xcoords[word_end]; - } else if (glyph_confidence == 2) { + } else if (lstm_choice_mode == 2) { float sum = 0; - std::vector> glyph_pairs; + std::vector> choice_pairs; for (size_t i = timestepEnd; i < xcoords[word_end]; i++) { - for (std::pair glyph : timesteps[i]) { - if (std::strcmp(glyph.first, "") != 0) { - sum += glyph.second; - glyph_pairs.push_back(glyph); + for (std::pair choice : timesteps[i]) { + if (std::strcmp(choice.first, "") != 0) { + sum += choice.second; + choice_pairs.push_back(choice); } } - if (best_glyphs.size() > 0 && i == best_glyphs.front().second-1 + if (best_choices.size() > 0 && i == best_choices.front().second - 1 || i == xcoords[word_end]-1) { std::map summed_propabilities; - for(auto it = glyph_pairs.begin(); it != glyph_pairs.end(); ++it) { + for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) { summed_propabilities[it->first] += it->second; } std::vector> accumulated_timestep; @@ -282,11 +282,11 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box, std::pair(it->first, it->second)); } - if (best_glyphs.size() > 0) { - current_char = best_glyphs.front().first; - best_glyphs.pop_front(); + if (best_choices.size() > 0) { + current_char = best_choices.front().first; + best_choices.pop_front(); } - glyph_pairs.clear(); + choice_pairs.clear(); word_res->timesteps.push_back(accumulated_timestep); sum = 0; } @@ -366,7 +366,7 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds( const GenericVector& best_nodes, GenericVector* unichar_ids, GenericVector* certs, GenericVector* ratings, GenericVector* xcoords, - std::deque>* best_glyphs) { + std::deque>* best_choices) { unichar_ids->truncate(0); certs->truncate(0); ratings->truncate(0); @@ -395,8 +395,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds( } unichar_ids->push_back(unichar_id); xcoords->push_back(t); - if(best_glyphs != nullptr) { - best_glyphs->push_back(std::pair(unichar_id,t)); + if (best_choices != nullptr) { + best_choices->push_back(std::pair(unichar_id, t)); } do { double cert = best_nodes[t++]->certainty; diff --git a/src/lstm/recodebeam.h b/src/lstm/recodebeam.h index 3d5adca743..ef4621028d 100644 --- a/src/lstm/recodebeam.h +++ b/src/lstm/recodebeam.h @@ -186,7 +186,7 @@ class RecodeBeamSearch { // If charset is not null, it enables detailed debugging of the beam search. void Decode(const NetworkIO& output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET* charset, - int glyph_confidence = 0); + int lstm_choice_mode = 0); void Decode(const GENERIC_2D_ARRAY& output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET* charset); @@ -206,7 +206,7 @@ class RecodeBeamSearch { void ExtractBestPathAsWords(const TBOX& line_box, float scale_factor, bool debug, const UNICHARSET* unicharset, PointerVector* words, - int glyph_confidence = 0); + int lstm_choice_mode = 0); // Generates debug output of the content of the beams after a Decode. void DebugBeams(const UNICHARSET& unicharset) const; @@ -282,7 +282,7 @@ class RecodeBeamSearch { const GenericVector& best_nodes, GenericVector* unichar_ids, GenericVector* certs, GenericVector* ratings, GenericVector* xcoords, - std::deque>* best_glyphs = nullptr); + std::deque>* best_choices = nullptr); // Sets up a word with the ratings matrix and fake blobs with boxes in the // right places. @@ -303,8 +303,8 @@ class RecodeBeamSearch { double cert_offset, double worst_dict_cert, const UNICHARSET* charset, bool debug = false); - //Saves the most certain glyphs for the current time-step - void SaveMostCertainGlyphs(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord); + //Saves the most certain choices for the current time-step + void SaveMostCertainChoices(const float* outputs, int num_outputs, const UNICHARSET* charset, int xCoord); // Adds to the appropriate beams the legal (according to recoder) // continuations of context prev, which is from the given index to beams_,