Skip to content

Commit

Permalink
Move LSTMTrainer from libtesseract to libtesseract_training
Browse files Browse the repository at this point in the history
LSTMTrainer is only used for training, so the shared library for
Tesseract can be made smaller.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Jun 22, 2019
1 parent 3159f42 commit df98bb7
Show file tree
Hide file tree
Showing 9 changed files with 8 additions and 10 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Expand Up @@ -510,7 +510,6 @@ install(FILES
src/lstm/input.h
src/lstm/lstm.h
src/lstm/lstmrecognizer.h
src/lstm/lstmtrainer.h
src/lstm/maxpool.h
src/lstm/networkbuilder.h
src/lstm/network.h
Expand Down
4 changes: 2 additions & 2 deletions src/lstm/Makefile.am
Expand Up @@ -28,7 +28,7 @@ pkginclude_HEADERS =

noinst_HEADERS = convolve.h ctc.h
noinst_HEADERS += fullyconnected.h functions.h input.h
noinst_HEADERS += lstm.h lstmrecognizer.h lstmtrainer.h maxpool.h
noinst_HEADERS += lstm.h lstmrecognizer.h maxpool.h
noinst_HEADERS += network.h networkbuilder.h networkio.h networkscratch.h
noinst_HEADERS += parallel.h plumbing.h recodebeam.h reconfig.h reversed.h
noinst_HEADERS += series.h static_shape.h stridemap.h
Expand All @@ -38,7 +38,7 @@ noinst_LTLIBRARIES = libtesseract_lstm.la

libtesseract_lstm_la_SOURCES = \
convolve.cpp ctc.cpp fullyconnected.cpp functions.cpp input.cpp \
lstm.cpp lstmrecognizer.cpp lstmtrainer.cpp maxpool.cpp \
lstm.cpp lstmrecognizer.cpp maxpool.cpp \
networkbuilder.cpp network.cpp networkio.cpp \
parallel.cpp plumbing.cpp recodebeam.cpp reconfig.cpp reversed.cpp \
series.cpp stridemap.cpp tfnetwork.cpp weightmatrix.cpp
Expand Down
6 changes: 0 additions & 6 deletions src/lstm/recodebeam.cpp
Expand Up @@ -31,12 +31,6 @@

namespace tesseract {

// Clipping value for certainty inside Tesseract. Reflects the minimum value
// of certainty that will be returned by ExtractBestPathAsUnicharIds.
// Supposedly on a uniform scale that can be compared across languages and
// engines.
const float RecodeBeamSearch::kMinCertainty = -20.0f;

// The beam width at each code position.
const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
Expand Down
2 changes: 1 addition & 1 deletion src/lstm/recodebeam.h
Expand Up @@ -219,7 +219,7 @@ class RecodeBeamSearch {
// of certainty that will be returned by ExtractBestPathAsUnicharIds.
// Supposedly on a uniform scale that can be compared across languages and
// engines.
static const float kMinCertainty;
static constexpr float kMinCertainty = -20.0f;
// Number of different code lengths for which we have a separate beam.
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1;
// Total number of beams: dawg/nodawg * number of NodeContinuation * number
Expand Down
2 changes: 2 additions & 0 deletions src/training/CMakeLists.txt
Expand Up @@ -179,6 +179,8 @@ set(unicharset_training_src
lang_model_helpers.h
lstmtester.cpp
lstmtester.h
lstmtrainer.cpp
lstmtrainer.h
normstrngs.cpp
normstrngs.h
unicharset_training_utils.cpp
Expand Down
2 changes: 2 additions & 0 deletions src/training/Makefile.am
Expand Up @@ -37,6 +37,7 @@ noinst_HEADERS = \
lang_model_helpers.h \
ligature_table.h \
lstmtester.h \
lstmtrainer.h \
normstrngs.h \
pango_font_info.h \
stringrenderer.h \
Expand Down Expand Up @@ -71,6 +72,7 @@ libtesseract_training_la_SOURCES = \
lang_model_helpers.cpp \
ligature_table.cpp \
lstmtester.cpp \
lstmtrainer.cpp \
normstrngs.cpp \
pango_font_info.cpp \
stringrenderer.cpp \
Expand Down
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions sw.cpp
Expand Up @@ -112,6 +112,7 @@ void build(Solution &s)
"src/training/icuerrorcode.h",
"src/training/lang_model_helpers.*"_rr,
"src/training/lstmtester.*"_rr,
"src/training/lstmtrainer.*"_rr,
"src/training/normstrngs.*"_rr,
"src/training/unicharset_training_utils.*"_rr,
"src/training/validat.*"_rr;
Expand Down

0 comments on commit df98bb7

Please sign in to comment.