Skip to content

Commit

Permalink
Move LSTM unicharset and recoder to traineddata with version string p…
Browse files Browse the repository at this point in the history
…art1. Backwards compatible - maybe.
  • Loading branch information
theraysmith committed Jul 14, 2017
1 parent 7588540 commit dc8745e
Show file tree
Hide file tree
Showing 14 changed files with 257 additions and 130 deletions.
1 change: 1 addition & 0 deletions api/apitypes.h
Expand Up @@ -21,6 +21,7 @@
#define TESSERACT_API_APITYPES_H_

#include "publictypes.h"
#include "version.h"

// The types used by the API and Page/ResultIterator can be found in:
// ccstruct/publictypes.h
Expand Down
4 changes: 0 additions & 4 deletions api/baseapi.h
Expand Up @@ -20,10 +20,6 @@
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_

#define TESSERACT_VERSION_STR "4.00.00alpha"
#define TESSERACT_VERSION 0x040000
#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
(patch))
#include <stdio.h>
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
Expand Down
6 changes: 3 additions & 3 deletions ccmain/tessedit.cpp
Expand Up @@ -188,10 +188,10 @@ bool Tesseract::init_tesseract_lang_data(
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (mgr->GetComponent(TESSDATA_LSTM, &fp)) {
if (mgr->IsComponentAvailable(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
ASSERT_HOST(lstm_recognizer_->DeSerialize(&fp));
if (lstm_use_matrix) lstm_recognizer_->LoadDictionary(language, mgr);
ASSERT_HOST(
lstm_recognizer_->Load(lstm_use_matrix ? language : nullptr, mgr));
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
Expand Down
3 changes: 2 additions & 1 deletion ccutil/Makefile.am
Expand Up @@ -15,7 +15,8 @@ endif
include_HEADERS = \
basedir.h errcode.h fileerr.h genericvector.h helpers.h host.h memry.h \
ndminx.h params.h ocrclass.h platform.h serialis.h strngs.h \
tesscallback.h unichar.h unicharcompress.h unicharmap.h unicharset.h
tesscallback.h unichar.h unicharcompress.h unicharmap.h unicharset.h \
version.h

noinst_HEADERS = \
ambigs.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \
Expand Down
24 changes: 24 additions & 0 deletions ccutil/tessdatamanager.cpp
Expand Up @@ -78,6 +78,9 @@ bool TessdataManager::LoadMemBuffer(const char *name, const char *data,
if (fp.FRead(&entries_[i][0], 1, entry_size) != entry_size) return false;
}
}
if (entries_[TESSDATA_VERSION].empty()) {
SetVersionString("Pre-4.0.0");
}
is_loaded_ = true;
return true;
}
Expand Down Expand Up @@ -139,6 +142,7 @@ void TessdataManager::Clear() {

// Prints a directory of contents.
void TessdataManager::Directory() const {
tprintf("Version string:%s\n", VersionString().c_str());
int offset = TESSDATA_NUM_ENTRIES * sizeof(inT64);
for (int i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
if (!entries_[i].empty()) {
Expand All @@ -153,12 +157,32 @@ void TessdataManager::Directory() const {
// Returns false in case of failure.
bool TessdataManager::GetComponent(TessdataType type, TFile *fp) {
if (!is_loaded_ && !Init(data_file_name_.string())) return false;
const TessdataManager *const_this = this;
return const_this->GetComponent(type, fp);
}

// As non-const version except it can't load the component if not already
// loaded.
bool TessdataManager::GetComponent(TessdataType type, TFile *fp) const {
ASSERT_HOST(is_loaded_);
if (entries_[type].empty()) return false;
fp->Open(&entries_[type][0], entries_[type].size());
fp->set_swap(swap_);
return true;
}

// Returns the current version string.
string TessdataManager::VersionString() const {
return string(&entries_[TESSDATA_VERSION][0],
entries_[TESSDATA_VERSION].size());
}

// Sets the version string to the given v_str.
void TessdataManager::SetVersionString(const string &v_str) {
entries_[TESSDATA_VERSION].resize_no_init(v_str.size());
memcpy(&entries_[TESSDATA_VERSION][0], v_str.data(), v_str.size());
}

bool TessdataManager::CombineDataFiles(
const char *language_data_path_prefix,
const char *output_filename) {
Expand Down
30 changes: 28 additions & 2 deletions ccutil/tessdatamanager.h
Expand Up @@ -25,6 +25,7 @@
#include "host.h"
#include "strngs.h"
#include "tprintf.h"
#include "version.h"

static const char kTrainedDataSuffix[] = "traineddata";

Expand All @@ -51,6 +52,9 @@ static const char kLSTMModelFileSuffix[] = "lstm";
static const char kLSTMPuncDawgFileSuffix[] = "lstm-punc-dawg";
static const char kLSTMSystemDawgFileSuffix[] = "lstm-word-dawg";
static const char kLSTMNumberDawgFileSuffix[] = "lstm-number-dawg";
static const char kLSTMUnicharsetFileSuffix[] = "lstm-unicharset";
static const char kLSTMRecoderFileSuffix[] = "lstm-recoder";
static const char kVersionFileSuffix[] = "version";

namespace tesseract {

Expand All @@ -76,6 +80,9 @@ enum TessdataType {
TESSDATA_LSTM_PUNC_DAWG, // 18
TESSDATA_LSTM_SYSTEM_DAWG, // 19
TESSDATA_LSTM_NUMBER_DAWG, // 20
TESSDATA_LSTM_UNICHARSET, // 21
TESSDATA_LSTM_RECODER, // 22
TESSDATA_VERSION, // 23

TESSDATA_NUM_ENTRIES
};
Expand Down Expand Up @@ -106,6 +113,9 @@ static const char *const kTessdataFileSuffixes[] = {
kLSTMPuncDawgFileSuffix, // 18
kLSTMSystemDawgFileSuffix, // 19
kLSTMNumberDawgFileSuffix, // 20
kLSTMUnicharsetFileSuffix, // 21
kLSTMRecoderFileSuffix, // 22
kVersionFileSuffix, // 23
};

/**
Expand All @@ -120,9 +130,13 @@ static const int kMaxNumTessdataEntries = 1000;

class TessdataManager {
public:
TessdataManager() : reader_(nullptr), is_loaded_(false), swap_(false) {}
TessdataManager() : reader_(nullptr), is_loaded_(false), swap_(false) {
SetVersionString(TESSERACT_VERSION_STR);
}
explicit TessdataManager(FileReader reader)
: reader_(reader), is_loaded_(false), swap_(false) {}
: reader_(reader), is_loaded_(false), swap_(false) {
SetVersionString(TESSERACT_VERSION_STR);
}
~TessdataManager() {}

bool swap() const { return swap_; }
Expand Down Expand Up @@ -152,9 +166,21 @@ class TessdataManager {
// Prints a directory of contents.
void Directory() const;

// Returns true if the component requested is present.
bool IsComponentAvailable(TessdataType type) const {
return !entries_[type].empty();
}
// Opens the given TFile pointer to the given component type.
// Returns false in case of failure.
bool GetComponent(TessdataType type, TFile *fp);
// As non-const version except it can't load the component if not already
// loaded.
bool GetComponent(TessdataType type, TFile *fp) const;

// Returns the current version string.
string VersionString() const;
// Sets the version string to the given v_str.
void SetVersionString(const string &v_str);

// Returns true if the base Tesseract components are present.
bool IsBaseAvailable() const {
Expand Down
9 changes: 9 additions & 0 deletions ccutil/version.h
@@ -0,0 +1,9 @@
#ifndef TESSERACT_CCUTIL_VERSION_H_
#define TESSERACT_CCUTIL_VERSION_H_

#define TESSERACT_VERSION_STR "4.00.00alpha"
#define TESSERACT_VERSION 0x040000
#define MAKE_VERSION(major, minor, patch) \
(((major) << 16) | ((minor) << 8) | (patch))

#endif // TESSERACT_CCUTIL_VERSION_H_
49 changes: 42 additions & 7 deletions lstm/lstmrecognizer.cpp
Expand Up @@ -68,10 +68,24 @@ LSTMRecognizer::~LSTMRecognizer() {
delete search_;
}

// Loads a model from mgr, including the dictionary only if lang is not null.
bool LSTMRecognizer::Load(const char* lang, TessdataManager* mgr) {
TFile fp;
if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
if (!DeSerialize(mgr, &fp)) return false;
if (lang == nullptr) return true;
// Allow it to run without a dictionary.
LoadDictionary(lang, mgr);
return true;
}

// Writes to the given file. Returns false in case of error.
bool LSTMRecognizer::Serialize(TFile* fp) const {
bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const {
bool include_charsets = mgr == nullptr ||
!mgr->IsComponentAvailable(TESSDATA_LSTM_RECODER) ||
!mgr->IsComponentAvailable(TESSDATA_LSTM_UNICHARSET);
if (!network_->Serialize(fp)) return false;
if (!GetUnicharset().save_to_file(fp)) return false;
if (include_charsets && !GetUnicharset().save_to_file(fp)) return false;
if (!network_str_.Serialize(fp)) return false;
if (fp->FWrite(&training_flags_, sizeof(training_flags_), 1) != 1)
return false;
Expand All @@ -83,16 +97,20 @@ bool LSTMRecognizer::Serialize(TFile* fp) const {
if (fp->FWrite(&weight_range_, sizeof(weight_range_), 1) != 1) return false;
if (fp->FWrite(&learning_rate_, sizeof(learning_rate_), 1) != 1) return false;
if (fp->FWrite(&momentum_, sizeof(momentum_), 1) != 1) return false;
if (IsRecoding() && !recoder_.Serialize(fp)) return false;
if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false;
return true;
}

// Reads from the given file. Returns false in case of error.
bool LSTMRecognizer::DeSerialize(TFile* fp) {
bool LSTMRecognizer::DeSerialize(const TessdataManager* mgr, TFile* fp) {
delete network_;
network_ = Network::CreateFromFile(fp);
if (network_ == NULL) return false;
if (!ccutil_.unicharset.load_from_file(fp, false)) return false;
bool include_charsets = mgr == nullptr ||
!mgr->IsComponentAvailable(TESSDATA_LSTM_RECODER) ||
!mgr->IsComponentAvailable(TESSDATA_LSTM_UNICHARSET);
if (include_charsets && !ccutil_.unicharset.load_from_file(fp, false))
return false;
if (!network_str_.DeSerialize(fp)) return false;
if (fp->FReadEndian(&training_flags_, sizeof(training_flags_), 1) != 1)
return false;
Expand All @@ -107,6 +125,25 @@ bool LSTMRecognizer::DeSerialize(TFile* fp) {
if (fp->FReadEndian(&learning_rate_, sizeof(learning_rate_), 1) != 1)
return false;
if (fp->FReadEndian(&momentum_, sizeof(momentum_), 1) != 1) return false;
if (include_charsets && !LoadRecoder(fp)) return false;
if (!include_charsets && !LoadCharsets(mgr)) return false;
network_->SetRandomizer(&randomizer_);
network_->CacheXScaleFactor(network_->XScaleFactor());
return true;
}

// Loads the charsets from mgr.
bool LSTMRecognizer::LoadCharsets(const TessdataManager* mgr) {
TFile fp;
if (!mgr->GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false;
if (!ccutil_.unicharset.load_from_file(&fp, false)) return false;
if (!mgr->GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false;
if (!LoadRecoder(&fp)) return false;
return true;
}

// Loads the Recoder.
bool LSTMRecognizer::LoadRecoder(TFile* fp) {
if (IsRecoding()) {
if (!recoder_.DeSerialize(fp)) return false;
RecodedCharID code;
Expand All @@ -119,8 +156,6 @@ bool LSTMRecognizer::DeSerialize(TFile* fp) {
recoder_.SetupPassThrough(GetUnicharset());
training_flags_ |= TF_COMPRESS_UNICHARSET;
}
network_->SetRandomizer(&randomizer_);
network_->CacheXScaleFactor(network_->XScaleFactor());
return true;
}

Expand Down
14 changes: 12 additions & 2 deletions lstm/lstmrecognizer.h
Expand Up @@ -155,10 +155,20 @@ class LSTMRecognizer {
}
int null_char() const { return null_char_; }

// Loads a model from mgr, including the dictionary only if lang is not null.
bool Load(const char* lang, TessdataManager* mgr);

// Writes to the given file. Returns false in case of error.
bool Serialize(TFile* fp) const;
// If mgr contains a unicharset and recoder, then they are not encoded to fp.
bool Serialize(const TessdataManager* mgr, TFile* fp) const;
// Reads from the given file. Returns false in case of error.
bool DeSerialize(TFile* fp);
// If mgr contains a unicharset and recoder, then they are taken from there,
// otherwise, they are part of the serialization in fp.
bool DeSerialize(const TessdataManager* mgr, TFile* fp);
// Loads the charsets from mgr.
bool LoadCharsets(const TessdataManager* mgr);
// Loads the Recoder.
bool LoadRecoder(TFile* fp);
// Loads the dictionary if possible from the traineddata file.
// Prints a warning message, and returns false but otherwise fails silently
// and continues to work without it if loading fails.
Expand Down

0 comments on commit dc8745e

Please sign in to comment.