From fc6a390c6c08eb6e661bf44af06dec2aeeb59b5f Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Fri, 8 Sep 2017 15:06:19 +0100 Subject: [PATCH] Added intsimdmatrix as a generic integer matrixdotvector function with AVX2 and SSE specializations --- api/Makefile.am | 2 + arch/Makefile.am | 18 ++- arch/intsimdmatrix.cpp | 133 ++++++++++++++++ arch/intsimdmatrix.h | 135 ++++++++++++++++ arch/intsimdmatrixavx2.cpp | 275 +++++++++++++++++++++++++++++++++ arch/intsimdmatrixavx2.h | 33 ++++ arch/intsimdmatrixsse.cpp | 44 ++++++ arch/intsimdmatrixsse.h | 33 ++++ ccstruct/matrix.h | 10 +- configure.ac | 6 + lstm/lstm.cpp | 9 +- lstm/networkio.cpp | 22 ++- lstm/networkio.h | 8 + lstm/weightmatrix.cpp | 25 +-- lstm/weightmatrix.h | 15 +- training/combine_lang_model | 228 +++++++++++++++++++++++++++ training/lstmeval | 228 +++++++++++++++++++++++++++ training/lstmtraining | 228 +++++++++++++++++++++++++++ unittest/Makefile.am | 16 +- unittest/include_gunit.h | 17 ++ unittest/intsimdmatrix_test.cc | 105 +++++++++++++ 21 files changed, 1549 insertions(+), 41 deletions(-) create mode 100644 arch/intsimdmatrix.cpp create mode 100644 arch/intsimdmatrix.h create mode 100644 arch/intsimdmatrixavx2.cpp create mode 100644 arch/intsimdmatrixavx2.h create mode 100644 arch/intsimdmatrixsse.cpp create mode 100644 arch/intsimdmatrixsse.h create mode 100755 training/combine_lang_model create mode 100755 training/lstmeval create mode 100755 training/lstmtraining create mode 100644 unittest/include_gunit.h create mode 100644 unittest/intsimdmatrix_test.cc diff --git a/api/Makefile.am b/api/Makefile.am index 7209c45cd3..4fa7488cd3 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -30,6 +30,7 @@ libtesseract_api_la_LIBADD = \ ../dict/libtesseract_dict.la \ ../arch/libtesseract_arch.la \ ../arch/libtesseract_avx.la \ + ../arch/libtesseract_avx2.la \ ../arch/libtesseract_sse.la \ ../lstm/libtesseract_lstm.la \ ../ccstruct/libtesseract_ccstruct.la \ @@ -60,6 +61,7 @@ libtesseract_la_LIBADD = \ ../dict/libtesseract_dict.la \ ../arch/libtesseract_arch.la \ ../arch/libtesseract_avx.la \ + ../arch/libtesseract_avx2.la \ ../arch/libtesseract_sse.la \ ../lstm/libtesseract_lstm.la \ ../ccstruct/libtesseract_ccstruct.la \ diff --git a/arch/Makefile.am b/arch/Makefile.am index 21515a7b97..85d9f21da2 100644 --- a/arch/Makefile.am +++ b/arch/Makefile.am @@ -1,4 +1,4 @@ -AM_CPPFLAGS += -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer -DUSE_STD_NAMESPACE +AM_CPPFLAGS += -I$(top_srcdir)/ccstruct -I$(top_srcdir)/ccutil -I$(top_srcdir)/viewer -DUSE_STD_NAMESPACE AUTOMAKE_OPTIONS = subdir-objects SUBDIRS = AM_CXXFLAGS = @@ -8,31 +8,37 @@ AM_CXXFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden AM_CPPFLAGS += -DTESS_EXPORTS endif -include_HEADERS = dotproductavx.h dotproductsse.h simddetect.h +include_HEADERS = dotproductavx.h dotproductsse.h intsimdmatrix.h intsimdmatrixavx2.h intsimdmatrixsse.h simddetect.h noinst_HEADERS = if !USING_MULTIPLELIBS -noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la +noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la noinst_LTLIBRARIES += libtesseract_arch.la else -lib_LTLIBRARIES = libtesseract_avx.la libtesseract_sse.la +lib_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la lib_LTLIBRARIES += libtesseract_arch.la libtesseract_arch_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_avx_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) +libtesseract_avx2_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) libtesseract_sse_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) endif if AVX_OPT libtesseract_avx_la_CXXFLAGS = -mavx endif +if AVX2_OPT +libtesseract_avx2_la_CXXFLAGS = -mavx2 +endif if SSE41_OPT libtesseract_sse_la_CXXFLAGS = -msse4.1 endif -libtesseract_arch_la_SOURCES = simddetect.cpp +libtesseract_arch_la_SOURCES = intsimdmatrix.cpp simddetect.cpp libtesseract_avx_la_SOURCES = dotproductavx.cpp -libtesseract_sse_la_SOURCES = dotproductsse.cpp +libtesseract_avx2_la_SOURCES = intsimdmatrixavx2.cpp + +libtesseract_sse_la_SOURCES = dotproductsse.cpp intsimdmatrixsse.cpp diff --git a/arch/intsimdmatrix.cpp b/arch/intsimdmatrix.cpp new file mode 100644 index 0000000000..59a86d2e1f --- /dev/null +++ b/arch/intsimdmatrix.cpp @@ -0,0 +1,133 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrix.cpp +// Description: Base class for 8-bit int SIMD matrix multipliers. +// Author: Ray Smith +// Created: Tue Aug 15 08:01:32 PST 2017 +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "intsimdmatrix.h" +#include "intsimdmatrixavx2.h" +#include "intsimdmatrixsse.h" +#include "simddetect.h" + +namespace tesseract { + +// Factory makes and returns an IntSimdMatrix (sub)class of the best +// available type for the current architecture. +/* static */ +IntSimdMatrix* IntSimdMatrix::GetFastestMultiplier() { + IntSimdMatrix* multiplier = nullptr; + if (SIMDDetect::IsAVX2Available()) { + multiplier = new IntSimdMatrixAVX2(); + } else if (SIMDDetect::IsSSEAvailable()) { + multiplier = new IntSimdMatrixSSE(); + } else { + // Default c++ implementation. + multiplier = new IntSimdMatrix(); + } + return multiplier; +} + +// Computes a reshaped copy of the weight matrix w. If there are no +// partial_funcs_, it does nothing. +void IntSimdMatrix::Init(const GENERIC_2D_ARRAY& w) { + if (partial_funcs_.empty()) return; + int num_out = w.dim1(); + int num_in = w.dim2() - 1; + // The rounded-up sizes of the reshaped weight matrix, excluding biases. + int rounded_num_in = Roundup(num_in, num_inputs_per_group_); + int rounded_num_out = RoundOutputs(num_out); + // Add the bias and compute the required size. + shaped_w_.resize((rounded_num_in + 1) * rounded_num_out, 0); + int shaped_index = 0; + int output = 0; + // Each number of registers needs a different format! Iterates over the + // different numbers of registers (each a power of 2). + for (int num_registers = max_output_registers_; num_registers >= 1; + num_registers /= 2) { + // The number of outputs that we will generate with this many registers. + int num_outputs_per_register_set = + num_registers * num_outputs_per_register_; + // Use the max number of registers until we have to go fewer. + while (output + num_outputs_per_register_set <= rounded_num_out) { + // Accumulating outputs in registers saves iterating over the inputs, so + // we only have to do it once per output register set. + for (int input = 0; input < num_in; input += num_inputs_per_group_) { + // Iterate over the number of outputs in a register set. + for (int j = 0; j < num_outputs_per_register_set; ++j) { + // Inner-most loop corresponds to the number of inputs in an input + // group. + for (int i = 0; i < num_inputs_per_group_; ++i) { + int8_t weight = 0; + if (output + j < num_out && input + i < num_in) + weight = w(output + j, input + i); + shaped_w_[shaped_index++] = weight; + } + } + } + // Append the bias weights for the register set. + for (int j = 0; j < num_outputs_per_register_set; ++j) { + int8_t weight = 0; + if (output + j < num_out) weight = w(output + j, num_in); + shaped_w_[shaped_index++] = weight; + } + output += num_outputs_per_register_set; + } + } +} + +// Computes matrix.vector v = Wu. +// u is of size W.dim2() - 1 and the output v is of size W.dim1(). +// u is imagined to have an extra element at the end with value 1, to +// implement the bias, but it doesn't actually have it. +void IntSimdMatrix::MatrixDotVector(const GENERIC_2D_ARRAY& w, + const GenericVector& scales, + const int8_t* u, double* v) const { + int num_out = w.dim1(); + int num_in = w.dim2() - 1; + if (partial_funcs_.empty()) { + // Base implementation. + for (int i = 0; i < num_out; ++i) { + const int8_t* wi = w[i]; + int total = 0; + for (int j = 0; j < num_in; ++j) total += wi[j] * u[j]; + // Add in the bias and correct for integer values. + v[i] = (static_cast(total) / MAX_INT8 + wi[num_in]) * scales[i]; + } + } else { + const int8_t* w_data = shaped_w_.data(); + const double* scales_data = &scales[0]; + // Each call to a partial_func_ produces group_size outputs, except the + // last one, which can produce less. + int group_size = num_outputs_per_register_ * max_output_registers_; + int rounded_num_in = Roundup(num_in, num_inputs_per_group_); + int rounded_num_out = RoundOutputs(num_out); + int output = 0; + for (auto fn : partial_funcs_) { + // The amount of w_data consumed by each call to fn. + int w_step = (rounded_num_in + 1) * group_size; + // Run with this group size, until it would produce too much output, then + // switch to a smaller size. + for (; output + group_size <= rounded_num_out; output += group_size) { + (*fn)(w_data, scales_data, u, rounded_num_in, num_out - output, v); + w_data += w_step; + scales_data += group_size; + v += group_size; + } + group_size /= 2; + } + } +} + +} // namespace tesseract diff --git a/arch/intsimdmatrix.h b/arch/intsimdmatrix.h new file mode 100644 index 0000000000..aa1a9a0ce2 --- /dev/null +++ b/arch/intsimdmatrix.h @@ -0,0 +1,135 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrix.h +// Description: Base class for 8-bit int SIMD matrix multipliers. +// Author: Ray Smith +// Created: Tue Aug 15 07:37:20 PST 2017 +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_ARCH_INTSIMDMATRIX_H_ +#define TESSERACT_ARCH_INTSIMDMATRIX_H_ + +#include +#include +#include "genericvector.h" +#include "matrix.h" + +namespace tesseract { + +// Base class for a SIMD function to multiply a matrix by a vector, with sources +// of 8-bit signed integer, and result in a double, after appropriate scaling. +// Assumes a specific method of multiplication that can be applied to any size +// and number of SIMD registers as follows: +// int32_t results are computed with num_outputs_per_register_ in each of +// max_output_registers_ result registers, repeatedly until it would make too +// many results, then the number of registers is halved, and so-on down to a +// single result register. The last calculation only outputs the required number +// of results instead of writing beyond the bounds. Eg: matrix has 75 outputs, +// num_outputs_per_register_ = 4, and max_output_registers_ = 8, +// Step 1: 8x4=32 results are computed, +// Step 2: 8x4=32 again, total 64, +// Step 3: 2x4=8 (since 8x4 is too many, so is 4x4), total 72, +// Step 4: 1x3, total 75. +// Each step above is computed using a PartialFunc, which runs over the input +// vector once. The input is read one registerful of num_inputs_per_register_ +// at a time (presumably 4x num_outputs_per_register_ since they are int8_t) +// so the inputs MUST BE PADDED to a multiple of num_inputs_per_register_. +// Since it is slow (on Intel at least) to horizontally add in a register, +// provision is made to process num_inputs_per_group_ inputs at a time, with +// the group being replicated num_input_groups_ times and multiplied by a +// num_inputs_per_group_ by num_input_groups_ rectangle of the weights matrix. +// This is most convenient if num_inputs_per_group_ is 4, and the product +// sign-extends and sums 8x8=16 bit results to 32 bits, adding 4 adjacent +// results in the process, but it doesn't have to be implemented that way. +// The weights are re-ordered by Init() to be used sequentially by the above +// algorithm, followed by the biases, so they can be added at the end. +// The base class computes the base C++ implementation. +// NOTE that, although the subclasses execute on different SIMD hardware, no +// virtual methods are needed, as the constructor sets up everything that +// is required to allow the base class implementation to do all the work. +class IntSimdMatrix { + public: + // Constructor should set the data members to indicate the sizes. + // NOTE: Base constructor public only for test purposes. + IntSimdMatrix() + : num_outputs_per_register_(1), + max_output_registers_(1), + num_inputs_per_register_(1), + num_inputs_per_group_(1), + num_input_groups_(1) {} + + // Factory makes and returns an IntSimdMatrix (sub)class of the best + // available type for the current architecture. + static IntSimdMatrix* GetFastestMultiplier(); + + // Computes a reshaped copy of the weight matrix w. If there are no + // partial_funcs_, it does nothing. + void Init(const GENERIC_2D_ARRAY& w); + + // Rounds the size up to a multiple of the input register size (in int8_t). + int RoundInputs(int size) const { + return Roundup(size, num_inputs_per_register_); + } + // Rounds the size up to a multiple of the output register size (in int32_t). + int RoundOutputs(int size) const { + return Roundup(size, num_outputs_per_register_); + } + + // Computes matrix.vector v = Wu. + // u is of size W.dim2() - 1 and the output v is of size W.dim1(). + // u is imagined to have an extra element at the end with value 1, to + // implement the bias, but it doesn't actually have it. + // Computes the base C++ implementation, if there are no partial_funcs_. + // NOTE: The size of the input vector (u) must be padded using + // RoundInputs above. + // The input will be over-read to the extent of the padding. There are no + // alignment requirements. + void MatrixDotVector(const GENERIC_2D_ARRAY& w, + const GenericVector& scales, const int8_t* u, + double* v) const; + + protected: + // Function to compute part of a matrix.vector multiplication. The weights + // are in a very specific order (see above) in w, which is multiplied by + // u of length num_in, to produce output v after scaling the integer results + // by the corresponding member of scales. + // The amount of w and scales consumed is fixed and not available to the + // caller. The number of outputs written to v will be at most num_out. + typedef void (*PartialFunc)(const int8_t* w, const double* scales, + const int8_t* u, int num_in, int num_out, + double* v); + + // Rounds the input up to a multiple of the given factor. + static int Roundup(int input, int factor) { + return (input + factor - 1) / factor * factor; + } + + // Number of 32 bit outputs held in each register. + int num_outputs_per_register_; + // Maximum number of registers that we will use to hold outputs. + int max_output_registers_; + // Number of 8 bit inputs in the inputs register. + int num_inputs_per_register_; + // Number of inputs in each weight group. + int num_inputs_per_group_; + // Number of groups of inputs to be broadcast. + int num_input_groups_; + // The weights matrix reorganized in whatever way suits this instance. + std::vector shaped_w_; + // A series of functions to compute a partial result. + std::vector partial_funcs_; +}; + +} // namespace tesseract + +#endif // TESSERACT_ARCH_INTSIMDMATRIX_H_ diff --git a/arch/intsimdmatrixavx2.cpp b/arch/intsimdmatrixavx2.cpp new file mode 100644 index 0000000000..2d6cbb6783 --- /dev/null +++ b/arch/intsimdmatrixavx2.cpp @@ -0,0 +1,275 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrixavx2.cpp +// Description: matrix-vector product for 8-bit data on avx2. +// Author: Ray Smith +// Created: Fri Aug 04 13:26:20 PST 2017 +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "intsimdmatrixavx2.h" + +#ifdef __AVX2__ +#include +#include +#include + +namespace tesseract { + +// Number of outputs held in each register. 8 x 32 bit ints. +constexpr int kNumOutputsPerRegister = 8; +// Maximum number of registers that we will use. +constexpr int kMaxOutputRegisters = 8; +// Number of inputs in the inputs register. +constexpr int kNumInputsPerRegister = 32; +// Number of inputs in each weight group. +constexpr int kNumInputsPerGroup = 4; +// Number of groups of inputs to be broadcast. +constexpr int kNumInputGroups = kNumInputsPerRegister / kNumInputsPerGroup; + +// Computes one set of 4x8 products of inputs and weights, adding to result. +// Horizontally adds 4 adjacent results, making 8x32-bit results. +// rep_input is assumed to be an 8x replicated set of 4x8-bit signed integers. +// Note that wi must previously have been re-organized with blocks of 4x8 +// weights in contiguous memory. +// ones is a register of 16x16-bit values all equal to 1. +// Note: wi is incremented by the amount of data read. +// weights and reps are scratch registers. +// This function must be inlined with references in order for the compiler to +// correctly use the registers declared in the caller. +inline void MultiplyGroup(const __m256i& rep_input, const __m256i& ones, + const int8_t*& wi, __m256i& weights, __m256i& reps, + __m256i& result) { + // Load a 4x8 block of weights. + weights = _mm256_loadu_si256(reinterpret_cast(wi)); + wi += kNumInputsPerRegister; + // Normalize the signs on rep_input, weights, so weights is always +ve. + reps = _mm256_sign_epi8(rep_input, weights); + weights = _mm256_sign_epi8(weights, weights); + // Multiply 32x8-bit reps by 32x8-bit weights to make 16x16-bit results, + // with adjacent pairs added. + weights = _mm256_maddubs_epi16(weights, reps); + // Multiply 16x16-bit result by 16x16-bit ones to make 8x32-bit results, + // with adjacent pairs added. What we really want is a horizontal add of + // 16+16=32 bit result, but there is no such instruction, so multiply by + // 16-bit ones instead. It is probably faster than all the sign-extending, + // permuting and adding that would otherwise be required. + weights = _mm256_madd_epi16(weights, ones); + result = _mm256_add_epi32(result, weights); +} + +// Extracts and converts 8x32-bit results from result, adding the bias from wi +// and scaling by scales, before storing in *v. Note that wi, scales and v are +// expected to contain 8 consecutive elements or num_out if less. +inline void ExtractResults(__m256i& result, __m256i& shift_id, + const int8_t*& wi, const double*& scales, + int num_out, double*& v) { + for (int out = 0; out < num_out; ++out) { + int32_t res = _mm256_extract_epi32(result, 0); + *v++ = (static_cast(res) / MAX_INT8 + *wi++) * *scales++; + // Rotate the results in int32_t units, so the next result is ready. + result = _mm256_permutevar8x32_epi32(result, shift_id); + } +} + +// Computes part of matrix.vector v = Wu. Computes N=64 results. +// The weights *must* be arranged so that consecutive reads from wi +// provides (num_in/kNumInputsPerGroup groups of (N output dim groups of +// (kNumInputsPerGroup inputs))). After that there must be N consecutive +// bias weights, before continuing with any more weights. +// u must be padded out with zeros to +// kNumInputsPerGroup*ceil(num_in/kNumInputsPerGroup) elements. +static void PartialMatrixDotVector64(const int8_t* wi, const double* scales, + const int8_t* u, int num_in, int num_out, + double* v) { + // Register containing 16-bit ones for horizontal add with 16->32 bit + // conversion. + __m256i ones = + _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); + // Initialize all the results to 0. + __m256i result0 = _mm256_setzero_si256(); + __m256i result1 = _mm256_setzero_si256(); + __m256i result2 = _mm256_setzero_si256(); + __m256i result3 = _mm256_setzero_si256(); + __m256i result4 = _mm256_setzero_si256(); + __m256i result5 = _mm256_setzero_si256(); + __m256i result6 = _mm256_setzero_si256(); + __m256i result7 = _mm256_setzero_si256(); + // Iterate over the input (u), one registerful at a time. + for (int j = 0; j < num_in;) { + __m256i inputs = + _mm256_loadu_si256(reinterpret_cast(u + j)); + // Inputs are processed in groups of kNumInputsPerGroup, replicated + // kNumInputGroups times. + for (int ig = 0; ig < kNumInputGroups && j < num_in; + ++ig, j += kNumInputsPerGroup) { + // Replicate the low 32 bits (4 inputs) 8 times. + __m256i rep_input = + _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); + // Rotate the inputs in groups of 4, so the next 4 inputs are ready. + inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); + __m256i weights, reps; + // Mul-add, with horizontal add of the 4 inputs to each of the results. + MultiplyGroup(rep_input, ones, wi, weights, reps, result0); + MultiplyGroup(rep_input, ones, wi, weights, reps, result1); + MultiplyGroup(rep_input, ones, wi, weights, reps, result2); + MultiplyGroup(rep_input, ones, wi, weights, reps, result3); + MultiplyGroup(rep_input, ones, wi, weights, reps, result4); + MultiplyGroup(rep_input, ones, wi, weights, reps, result5); + MultiplyGroup(rep_input, ones, wi, weights, reps, result6); + MultiplyGroup(rep_input, ones, wi, weights, reps, result7); + } + } + ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result3, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result4, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result5, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result6, shift_id, wi, scales, kNumOutputsPerRegister, v); + num_out -= kNumOutputsPerRegister * 7; + ExtractResults(result7, shift_id, wi, scales, + std::min(kNumOutputsPerRegister, num_out), v); +} + +// Computes part of matrix.vector v = Wu. Computes N=32 results. +// For details see PartialMatrixDotVector64 with N=32. +static void PartialMatrixDotVector32(const int8_t* wi, const double* scales, + const int8_t* u, int num_in, int num_out, + double* v) { + // Register containing 16-bit ones for horizontal add with 16->32 bit + // conversion. + __m256i ones = + _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); + // Initialize all the results to 0. + __m256i result0 = _mm256_setzero_si256(); + __m256i result1 = _mm256_setzero_si256(); + __m256i result2 = _mm256_setzero_si256(); + __m256i result3 = _mm256_setzero_si256(); + // Iterate over the input (u), one registerful at a time. + for (int j = 0; j < num_in;) { + __m256i inputs = + _mm256_loadu_si256(reinterpret_cast(u + j)); + // Inputs are processed in groups of kNumInputsPerGroup, replicated + // kNumInputGroups times. + for (int ig = 0; ig < kNumInputGroups && j < num_in; + ++ig, j += kNumInputsPerGroup) { + // Replicate the low 32 bits (4 inputs) 8 times. + __m256i rep_input = + _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); + // Rotate the inputs in groups of 4, so the next 4 inputs are ready. + inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); + __m256i weights, reps; + // Mul-add, with horizontal add of the 4 inputs to each of the results. + MultiplyGroup(rep_input, ones, wi, weights, reps, result0); + MultiplyGroup(rep_input, ones, wi, weights, reps, result1); + MultiplyGroup(rep_input, ones, wi, weights, reps, result2); + MultiplyGroup(rep_input, ones, wi, weights, reps, result3); + } + } + ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result1, shift_id, wi, scales, kNumOutputsPerRegister, v); + ExtractResults(result2, shift_id, wi, scales, kNumOutputsPerRegister, v); + num_out -= kNumOutputsPerRegister * 3; + ExtractResults(result3, shift_id, wi, scales, + std::min(kNumOutputsPerRegister, num_out), v); +} + +// Computes part of matrix.vector v = Wu. Computes N=16 results. +// For details see PartialMatrixDotVector64 with N=16. +static void PartialMatrixDotVector16(const int8_t* wi, const double* scales, + const int8_t* u, int num_in, int num_out, + double* v) { + // Register containing 16-bit ones for horizontal add with 16->32 bit + // conversion. + __m256i ones = + _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); + // Initialize all the results to 0. + __m256i result0 = _mm256_setzero_si256(); + __m256i result1 = _mm256_setzero_si256(); + // Iterate over the input (u), one registerful at a time. + for (int j = 0; j < num_in;) { + __m256i inputs = + _mm256_loadu_si256(reinterpret_cast(u + j)); + // Inputs are processed in groups of kNumInputsPerGroup, replicated + // kNumInputGroups times. + for (int ig = 0; ig < kNumInputGroups && j < num_in; + ++ig, j += kNumInputsPerGroup) { + // Replicate the low 32 bits (4 inputs) 8 times. + __m256i rep_input = + _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); + // Rotate the inputs in groups of 4, so the next 4 inputs are ready. + inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); + __m256i weights, reps; + // Mul-add, with horizontal add of the 4 inputs to each of the results. + MultiplyGroup(rep_input, ones, wi, weights, reps, result0); + MultiplyGroup(rep_input, ones, wi, weights, reps, result1); + } + } + ExtractResults(result0, shift_id, wi, scales, kNumOutputsPerRegister, v); + num_out -= kNumOutputsPerRegister; + ExtractResults(result1, shift_id, wi, scales, + std::min(kNumOutputsPerRegister, num_out), v); +} + +// Computes part of matrix.vector v = Wu. Computes N=8 results. +// For details see PartialMatrixDotVector64 with N=8. +static void PartialMatrixDotVector8(const int8_t* wi, const double* scales, + const int8_t* u, int num_in, int num_out, + double* v) { + // Register containing 16-bit ones for horizontal add with 16->32 bit + // conversion. + __m256i ones = + _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + __m256i shift_id = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); + // Initialize all the results to 0. + __m256i result0 = _mm256_setzero_si256(); + // Iterate over the input (u), one registerful at a time. + for (int j = 0; j < num_in;) { + __m256i inputs = + _mm256_loadu_si256(reinterpret_cast(u + j)); + // Inputs are processed in groups of kNumInputsPerGroup, replicated + // kNumInputGroups times. + for (int ig = 0; ig < kNumInputGroups && j < num_in; + ++ig, j += kNumInputsPerGroup) { + // Replicate the low 32 bits (4 inputs) 8 times. + __m256i rep_input = + _mm256_broadcastd_epi32(_mm256_castsi256_si128(inputs)); + // Rotate the inputs in groups of 4, so the next 4 inputs are ready. + inputs = _mm256_permutevar8x32_epi32(inputs, shift_id); + __m256i weights, reps; + // Mul-add, with horizontal add of the 4 inputs to each of the results. + MultiplyGroup(rep_input, ones, wi, weights, reps, result0); + } + } + ExtractResults(result0, shift_id, wi, scales, num_out, v); +} +#else +namespace tesseract { +#endif // __AVX2__ + +IntSimdMatrixAVX2::IntSimdMatrixAVX2() { +#ifdef __AVX2__ + num_outputs_per_register_ = kNumOutputsPerRegister; + max_output_registers_ = kMaxOutputRegisters; + num_inputs_per_register_ = kNumInputsPerRegister; + num_inputs_per_group_ = kNumInputsPerGroup; + num_input_groups_ = kNumInputGroups; + partial_funcs_ = {PartialMatrixDotVector64, PartialMatrixDotVector32, + PartialMatrixDotVector16, PartialMatrixDotVector8}; +#endif // __AVX2__ +} + +} // namespace tesseract. diff --git a/arch/intsimdmatrixavx2.h b/arch/intsimdmatrixavx2.h new file mode 100644 index 0000000000..280bf2f05d --- /dev/null +++ b/arch/intsimdmatrixavx2.h @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsindmatrixavx2.h +// Description: AVX2 implementation of 8-bit int SIMD matrix multiply. +// Author: Ray Smith +// Created: Wed Aug 16 10:21:42 PST 2017 +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// +#ifndef TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_ +#define TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_ + +#include "intsimdmatrix.h" + +namespace tesseract { + +// AVX2 implementation of IntSimdMatrix. +class IntSimdMatrixAVX2 : public IntSimdMatrix { + public: + IntSimdMatrixAVX2(); +}; + +} // namespace tesseract + +#endif // TESSERACT_ARCH_INTSIMDMATRIXAVX2_H_ diff --git a/arch/intsimdmatrixsse.cpp b/arch/intsimdmatrixsse.cpp new file mode 100644 index 0000000000..9e8a7ef1ec --- /dev/null +++ b/arch/intsimdmatrixsse.cpp @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsindmatrixsse.cpp +// Description: SSE implementation of 8-bit int SIMD matrix multiply. +// Author: Ray Smith +// Created: Tue Aug 23 13:58:49 PST 2017 +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "intsimdmatrixsse.h" + +#include +#include +#include "dotproductsse.h" + +namespace tesseract { + +#ifdef __SSE4_1__ +// Computes part of matrix.vector v = Wu. Computes 1 result. +static void PartialMatrixDotVector1(const int8_t* wi, const double* scales, + const int8_t* u, int num_in, int num_out, + double* v) { + int total = IntDotProductSSE(u, wi, num_in); + // Add in the bias and correct for integer values. + *v = (static_cast(total) / MAX_INT8 + wi[num_in]) * *scales; +} +#endif // __SSE4_1__ + +IntSimdMatrixSSE::IntSimdMatrixSSE() { +#ifdef __SSE4_1__ + partial_funcs_ = {PartialMatrixDotVector1}; +#endif // __SSE4_1__ +} + +} // namespace tesseract. diff --git a/arch/intsimdmatrixsse.h b/arch/intsimdmatrixsse.h new file mode 100644 index 0000000000..9ca2c89062 --- /dev/null +++ b/arch/intsimdmatrixsse.h @@ -0,0 +1,33 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsindmatrixsse.h +// Description: SSE implementation of 8-bit int SIMD matrix multiply. +// Author: Ray Smith +// Created: Tue Aug 23 13:58:21 PST 2017 +// +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// +#ifndef TESSERACT_ARCH_INTSIMDMATRIXSSE_H_ +#define TESSERACT_ARCH_INTSIMDMATRIXSSE_H_ + +#include "intsimdmatrix.h" + +namespace tesseract { + +// AVX2 implementation of IntSimdMatrix. +class IntSimdMatrixSSE : public IntSimdMatrix { + public: + IntSimdMatrixSSE(); +}; + +} // namespace tesseract + +#endif // TESSERACT_ARCH_INTSIMDMATRIXSSE_H_ diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h index 5c06871b46..7eabf775bc 100644 --- a/ccstruct/matrix.h +++ b/ccstruct/matrix.h @@ -81,10 +81,12 @@ class GENERIC_2D_ARRAY { memcpy(array_, src.array_, num_elements() * sizeof(array_[0])); } - // Reallocate the array to the given size. Does not keep old data, but does + // Reallocates the array to the given size. Does not keep old data, but does // not initialize the array either. - void ResizeNoInit(int size1, int size2) { - int new_size = size1 * size2; + // The allocated memory is expanded on the end by pad, allowing deliberate + // access beyond the bounds of the array. + void ResizeNoInit(int size1, int size2, int pad = 0) { + int new_size = size1 * size2 + pad; if (new_size > size_allocated_) { delete [] array_; array_ = new T[new_size]; @@ -92,6 +94,8 @@ class GENERIC_2D_ARRAY { } dim1_ = size1; dim2_ = size2; + // Fill the padding data so it isn't uninitialized. + for (int i = size1 * size2; i < new_size; ++i) array_[i] = empty_; } // Reallocate the array to the given size. Does not keep old data. diff --git a/configure.ac b/configure.ac index a6e2ae5e63..59bf1cf5d2 100644 --- a/configure.ac +++ b/configure.ac @@ -117,6 +117,7 @@ esac ## Checks for supported compiler options. AM_CONDITIONAL([AVX_OPT], false) +AM_CONDITIONAL([AVX2_OPT], false) AM_CONDITIONAL([SSE41_OPT], false) AX_CHECK_COMPILE_FLAG([-mavx], [avx=true], [avx=false]) @@ -124,6 +125,11 @@ if $avx; then AM_CONDITIONAL([AVX_OPT], true) fi +AX_CHECK_COMPILE_FLAG([-mavx2], [avx2=true], [avx2=false]) +if $avx2; then + AM_CONDITIONAL([AVX2_OPT], true) +fi + AX_CHECK_COMPILE_FLAG([-msse4.1], [sse41=true], [sse41=false]) if $sse41; then AM_CONDITIONAL([SSE41_OPT], true) diff --git a/lstm/lstm.cpp b/lstm/lstm.cpp index 2660f877b9..7d804a198a 100644 --- a/lstm/lstm.cpp +++ b/lstm/lstm.cpp @@ -260,7 +260,9 @@ void LSTM::Forward(bool debug, const NetworkIO& input, if (softmax_ != NULL) { softmax_output.Init(no_, scratch); ZeroVector(no_, softmax_output); - if (input.int_mode()) int_output.Resize2d(true, 1, ns_, scratch); + int rounded_softmax_inputs = gate_weights_[CI].RoundInputs(ns_); + if (input.int_mode()) + int_output.Resize2d(true, 1, rounded_softmax_inputs, scratch); softmax_->SetupForward(input, NULL); } NetworkScratch::FloatVec curr_input; @@ -364,7 +366,7 @@ void LSTM::Forward(bool debug, const NetworkIO& input, if (IsTraining()) state_.WriteTimeStep(t, curr_state); if (softmax_ != NULL) { if (input.int_mode()) { - int_output->WriteTimeStep(0, curr_output); + int_output->WriteTimeStepPart(0, 0, ns_, curr_output); softmax_->ForwardTimeStep(NULL, int_output->i(0), t, softmax_output); } else { softmax_->ForwardTimeStep(curr_output, NULL, t, softmax_output); @@ -720,7 +722,8 @@ void LSTM::PrintDW() { // Resizes forward data to cope with an input image of the given width. void LSTM::ResizeForward(const NetworkIO& input) { - source_.Resize(input, na_); + int rounded_inputs = gate_weights_[CI].RoundInputs(na_); + source_.Resize(input, rounded_inputs); which_fg_.ResizeNoInit(input.Width(), ns_); if (IsTraining()) { state_.ResizeFloat(input, ns_); diff --git a/lstm/networkio.cpp b/lstm/networkio.cpp index dfa46aa4cc..53db3aee53 100644 --- a/lstm/networkio.cpp +++ b/lstm/networkio.cpp @@ -30,12 +30,17 @@ const float kMinCertainty = -20.0f; // Probability corresponding to kMinCertainty. const float kMinProb = exp(kMinCertainty); +// Holds the optimal integer multiplier for this machine. +// This is a leaked, lazily initialized singleton, and is used for computing +// padding to apply to i_ for SIMD use. +IntSimdMatrix* NetworkIO::multiplier_ = nullptr; + // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim. void NetworkIO::Resize2d(bool int_mode, int width, int num_features) { stride_map_ = StrideMap(); int_mode_ = int_mode; if (int_mode_) { - i_.ResizeNoInit(width, num_features); + i_.ResizeNoInit(width, num_features, GetPadding(num_features)); } else { f_.ResizeNoInit(width, num_features); } @@ -51,7 +56,7 @@ void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map, stride_map_ = stride_map; int_mode_ = int_mode; if (int_mode_) { - i_.ResizeNoInit(stride_map.Width(), num_features); + i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features)); } else { f_.ResizeNoInit(stride_map.Width(), num_features); } @@ -976,4 +981,17 @@ void NetworkIO::ClipVector(int t, float range) { v[i] = ClipToRange(v[i], -range, range); } +// Returns the padding required for the given number of features in order +// for the SIMD operations to be safe. +/* static */ +int NetworkIO::GetPadding(int num_features) { + if (multiplier_ == nullptr) + multiplier_ = IntSimdMatrix::GetFastestMultiplier(); + int pad = 0; + if (multiplier_ != nullptr) { + pad = multiplier_->RoundInputs(num_features) - num_features; + } + return pad; +} + } // namespace tesseract. diff --git a/lstm/networkio.h b/lstm/networkio.h index 5082269917..b56b2c595c 100644 --- a/lstm/networkio.h +++ b/lstm/networkio.h @@ -327,6 +327,10 @@ class NetworkIO { } private: + // Returns the padding required for the given number of features in order + // for the SIMD operations to be safe. + static int GetPadding(int num_features); + // Choice of float vs 8 bit int for data. GENERIC_2D_ARRAY f_; GENERIC_2D_ARRAY i_; @@ -334,6 +338,10 @@ class NetworkIO { bool int_mode_; // Stride for 2d input data. StrideMap stride_map_; + // Holds the optimal integer multiplier for this machine. + // This is a leaked, lazily initialized singleton, and is used for computing + // padding to apply to i_ for SIMD use. + static IntSimdMatrix* multiplier_; }; } // namespace tesseract. diff --git a/lstm/weightmatrix.cpp b/lstm/weightmatrix.cpp index ea41413bb8..1103ea6a4e 100644 --- a/lstm/weightmatrix.cpp +++ b/lstm/weightmatrix.cpp @@ -20,6 +20,7 @@ #include "dotproductavx.h" #include "dotproductsse.h" +#include "intsimdmatrix.h" #include "simddetect.h" #include "statistc.h" #include "tprintf.h" @@ -61,10 +62,7 @@ int WeightMatrix::InitWeightsFloat(int no, int ni, bool use_adam, // the old weight matrix entries for each output from code_map[output] where // non-negative, and uses the mean (over all outputs) of the existing weights // for all outputs with negative code_map entries. Returns the new number of -// weights. Can be used to change the character set addressed by an output -// softmax. -// TODO(rays) A RemapInputs would also be useful, so a change can be made -// in the middle of a network. +// weights. int WeightMatrix::RemapOutputs(const std::vector& code_map) { GENERIC_2D_ARRAY old_wf(wf_); int old_no = wf_.dim1(); @@ -114,6 +112,8 @@ void WeightMatrix::ConvertToInt() { } wf_.Resize(1, 1, 0.0); int_mode_ = true; + multiplier_.reset(IntSimdMatrix::GetFastestMultiplier()); + if (multiplier_ != nullptr) multiplier_->Init(wi_); } // Allocates any needed memory for running Backward, and zeroes the deltas, @@ -165,6 +165,8 @@ bool WeightMatrix::DeSerialize(bool training, TFile* fp) { if (int_mode_) { if (!wi_.DeSerialize(fp)) return false; if (!scales_.DeSerialize(fp)) return false; + multiplier_.reset(IntSimdMatrix::GetFastestMultiplier()); + if (multiplier_ != nullptr) multiplier_->Init(wi_); } else { if (!wf_.DeSerialize(fp)) return false; if (training) { @@ -212,19 +214,8 @@ void WeightMatrix::MatrixDotVector(const double* u, double* v) const { void WeightMatrix::MatrixDotVector(const inT8* u, double* v) const { ASSERT_HOST(int_mode_); - int num_out = wi_.dim1(); - int num_in = wi_.dim2() - 1; - for (int i = 0; i < num_out; ++i) { - const inT8* Wi = wi_[i]; - int total = 0; - if (SIMDDetect::IsSSEAvailable()) { - total = IntDotProductSSE(u, Wi, num_in); - } else { - for (int j = 0; j < num_in; ++j) total += Wi[j] * u[j]; - } - // Add in the bias and correct for integer values. - v[i] = (static_cast(total) / MAX_INT8 + Wi[num_in]) * scales_[i]; - } + ASSERT_HOST(multiplier_ != nullptr); + multiplier_->MatrixDotVector(wi_, scales_, u, v); } // MatrixDotVector for peep weights, MultiplyAccumulate adds the diff --git a/lstm/weightmatrix.h b/lstm/weightmatrix.h index 0805e0b253..557c979109 100644 --- a/lstm/weightmatrix.h +++ b/lstm/weightmatrix.h @@ -19,7 +19,9 @@ #ifndef TESSERACT_LSTM_WEIGHTMATRIX_H_ #define TESSERACT_LSTM_WEIGHTMATRIX_H_ +#include #include "genericvector.h" +#include "intsimdmatrix.h" #include "matrix.h" #include "tprintf.h" @@ -74,10 +76,7 @@ class WeightMatrix { // the old weight matrix entries for each output from code_map[output] where // non-negative, and uses the mean (over all outputs) of the existing weights // for all outputs with negative code_map entries. Returns the new number of - // weights. Can be used to change the character set addressed by an output - // softmax. - // TODO(rays) A RemapInputs would also be useful, so a change can be made - // in the middle of a network. + // weights. int RemapOutputs(const std::vector& code_map); // Converts a float network to an int network. Each set of input weights that @@ -88,6 +87,12 @@ class WeightMatrix { // Store a multiplicative scale factor (as a float) that will reproduce // the original value, subject to rounding errors. void ConvertToInt(); + // Returns the size rounded up to an internal factor used by the SIMD + // implementation for its input. + int RoundInputs(int size) const { + if (multiplier_ == nullptr) return size; + return multiplier_->RoundInputs(size); + } // Accessors. bool is_int_mode() const { @@ -184,6 +189,8 @@ class WeightMatrix { // Iff use_adam_, the sum of squares of dw_. The number of samples is // given to Update(). Serialized iff use_adam_. GENERIC_2D_ARRAY dw_sq_sum_; + // Holds the optimal integer multiplier for this machine. + std::unique_ptr multiplier_; }; } // namespace tesseract. diff --git a/training/combine_lang_model b/training/combine_lang_model new file mode 100755 index 0000000000..0a24f6fa72 --- /dev/null +++ b/training/combine_lang_model @@ -0,0 +1,228 @@ +#! /bin/sh + +# combine_lang_model - temporary wrapper script for .libs/combine_lang_model +# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1 +# +# The combine_lang_model program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s/\([`"$\\]\)/\\\1/g' + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command="(cd /usr/local/google/home/rays/opensrc/git/tesseract/training; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=/usr/local/google/home/rays/bin:/usr/lib/google-golang/bin:/usr/local/buildtools/java/jdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/google/home/rays/bin; export PATH; g++ -g -O2 -std=c++11 -o \$progdir/\$file combine_lang_model.o ./.libs/libtesseract_training.a ./.libs/libtesseract_tessopt.a -licui18n -licuuc -licudata ../api/.libs/libtesseract.so -lpthread -fopenmp -Wl,-rpath -Wl,/usr/local/google/home/rays/opensrc/git/tesseract/api/.libs)" + +# This environment variable determines our operation mode. +if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then + # install mode needs the following variables: + generated_by_libtool_version='2.4.2' + notinst_deplibs=' ../api/libtesseract.la' +else + # When we are sourced in execute mode, $file and $ECHO are already set. + if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then + file="$0" + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + ECHO="printf %s\\n" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ which is used only on +# windows platforms, and (c) all begin with the string --lt- +# (application programs are unlikely to have options which match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's ../libtool value, followed by no. +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=$0 + shift + for lt_opt + do + case "$lt_opt" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'` + test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=. + lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'` + cat "$lt_dump_D/$lt_dump_F" + exit 0 + ;; + --lt-*) + $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n "$lt_option_debug"; then + echo "combine_lang_model:combine_lang_model:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + $ECHO "combine_lang_model:combine_lang_model:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg" + lt_dump_args_N=`expr $lt_dump_args_N + 1` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ + + if test -n "$lt_option_debug"; then + $ECHO "combine_lang_model:combine_lang_model:${LINENO}: newargv[0]: $progdir/$program" 1>&2 + func_lt_dump_args ${1+"$@"} 1>&2 + fi + exec "$progdir/$program" ${1+"$@"} + + $ECHO "$0: cannot exec $program $*" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from $@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case " $* " in + *\ --lt-*) + for lt_wr_arg + do + case $lt_wr_arg in + --lt-*) ;; + *) set x "$@" "$lt_wr_arg"; shift;; + esac + shift + done ;; + esac + func_exec_program_core ${1+"$@"} +} + + # Parse options + func_parse_lt_options "$0" ${1+"$@"} + + # Find the directory that this script lives in. + thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'` + test "x$thisdir" = "x$file" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'` + while test -n "$file"; do + destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'` + + # If there was a directory component, then change thisdir. + if test "x$destdir" != "x$file"; then + case "$destdir" in + [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;; + *) thisdir="$thisdir/$destdir" ;; + esac + fi + + file=`$ECHO "$file" | /bin/sed 's%^.*/%%'` + file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no + if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then + # special case for '.' + if test "$thisdir" = "."; then + thisdir=`pwd` + fi + # remove .libs from thisdir + case "$thisdir" in + *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;; + .libs ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=`cd "$thisdir" && pwd` + test -n "$absdir" && thisdir="$absdir" + + program=lt-'combine_lang_model' + progdir="$thisdir/.libs" + + if test ! -f "$progdir/$program" || + { file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \ + test "X$file" != "X$progdir/$program"; }; then + + file="$$-$program" + + if test ! -d "$progdir"; then + mkdir "$progdir" + else + rm -f "$progdir/$file" + fi + + # relink executable if necessary + if test -n "$relink_command"; then + if relink_command_output=`eval $relink_command 2>&1`; then : + else + printf %s\n "$relink_command_output" >&2 + rm -f "$progdir/$file" + exit 1 + fi + fi + + mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null || + { rm -f "$progdir/$program"; + mv -f "$progdir/$file" "$progdir/$program"; } + rm -f "$progdir/$file" + fi + + if test -f "$progdir/$program"; then + if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then + # Run the actual program with our arguments. + func_exec_program ${1+"$@"} + fi + else + # The program doesn't exist. + $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2 + $ECHO "This script is just a wrapper for $program." 1>&2 + $ECHO "See the libtool documentation for more information." 1>&2 + exit 1 + fi +fi diff --git a/training/lstmeval b/training/lstmeval new file mode 100755 index 0000000000..86c61c226c --- /dev/null +++ b/training/lstmeval @@ -0,0 +1,228 @@ +#! /bin/sh + +# lstmeval - temporary wrapper script for .libs/lstmeval +# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1 +# +# The lstmeval program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s/\([`"$\\]\)/\\\1/g' + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command="(cd /usr/local/google/home/rays/opensrc/git/tesseract/training; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=/usr/local/google/home/rays/bin:/usr/lib/google-golang/bin:/usr/local/buildtools/java/jdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/google/home/rays/bin; export PATH; g++ -g -O2 -std=c++11 -o \$progdir/\$file lstmeval.o ./.libs/libtesseract_training.a ./.libs/libtesseract_tessopt.a -licuuc -licudata ../api/.libs/libtesseract.so -L/usr/local/lib /usr/local/lib/liblept.so -lpthread -fopenmp -Wl,-rpath -Wl,/usr/local/google/home/rays/opensrc/git/tesseract/api/.libs)" + +# This environment variable determines our operation mode. +if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then + # install mode needs the following variables: + generated_by_libtool_version='2.4.2' + notinst_deplibs=' ../api/libtesseract.la' +else + # When we are sourced in execute mode, $file and $ECHO are already set. + if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then + file="$0" + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + ECHO="printf %s\\n" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ which is used only on +# windows platforms, and (c) all begin with the string --lt- +# (application programs are unlikely to have options which match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's ../libtool value, followed by no. +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=$0 + shift + for lt_opt + do + case "$lt_opt" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'` + test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=. + lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'` + cat "$lt_dump_D/$lt_dump_F" + exit 0 + ;; + --lt-*) + $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n "$lt_option_debug"; then + echo "lstmeval:lstmeval:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + $ECHO "lstmeval:lstmeval:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg" + lt_dump_args_N=`expr $lt_dump_args_N + 1` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ + + if test -n "$lt_option_debug"; then + $ECHO "lstmeval:lstmeval:${LINENO}: newargv[0]: $progdir/$program" 1>&2 + func_lt_dump_args ${1+"$@"} 1>&2 + fi + exec "$progdir/$program" ${1+"$@"} + + $ECHO "$0: cannot exec $program $*" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from $@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case " $* " in + *\ --lt-*) + for lt_wr_arg + do + case $lt_wr_arg in + --lt-*) ;; + *) set x "$@" "$lt_wr_arg"; shift;; + esac + shift + done ;; + esac + func_exec_program_core ${1+"$@"} +} + + # Parse options + func_parse_lt_options "$0" ${1+"$@"} + + # Find the directory that this script lives in. + thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'` + test "x$thisdir" = "x$file" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'` + while test -n "$file"; do + destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'` + + # If there was a directory component, then change thisdir. + if test "x$destdir" != "x$file"; then + case "$destdir" in + [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;; + *) thisdir="$thisdir/$destdir" ;; + esac + fi + + file=`$ECHO "$file" | /bin/sed 's%^.*/%%'` + file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no + if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then + # special case for '.' + if test "$thisdir" = "."; then + thisdir=`pwd` + fi + # remove .libs from thisdir + case "$thisdir" in + *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;; + .libs ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=`cd "$thisdir" && pwd` + test -n "$absdir" && thisdir="$absdir" + + program=lt-'lstmeval' + progdir="$thisdir/.libs" + + if test ! -f "$progdir/$program" || + { file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \ + test "X$file" != "X$progdir/$program"; }; then + + file="$$-$program" + + if test ! -d "$progdir"; then + mkdir "$progdir" + else + rm -f "$progdir/$file" + fi + + # relink executable if necessary + if test -n "$relink_command"; then + if relink_command_output=`eval $relink_command 2>&1`; then : + else + printf %s\n "$relink_command_output" >&2 + rm -f "$progdir/$file" + exit 1 + fi + fi + + mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null || + { rm -f "$progdir/$program"; + mv -f "$progdir/$file" "$progdir/$program"; } + rm -f "$progdir/$file" + fi + + if test -f "$progdir/$program"; then + if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then + # Run the actual program with our arguments. + func_exec_program ${1+"$@"} + fi + else + # The program doesn't exist. + $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2 + $ECHO "This script is just a wrapper for $program." 1>&2 + $ECHO "See the libtool documentation for more information." 1>&2 + exit 1 + fi +fi diff --git a/training/lstmtraining b/training/lstmtraining new file mode 100755 index 0000000000..90e9c2e49a --- /dev/null +++ b/training/lstmtraining @@ -0,0 +1,228 @@ +#! /bin/sh + +# lstmtraining - temporary wrapper script for .libs/lstmtraining +# Generated by libtool (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1 +# +# The lstmtraining program cannot be directly executed until all the libtool +# libraries that it depends on are installed. +# +# This wrapper script should never be moved out of the build directory. +# If it is, it will not operate correctly. + +# Sed substitution that helps us do robust quoting. It backslashifies +# metacharacters that are still active within double-quoted strings. +sed_quote_subst='s/\([`"$\\]\)/\\\1/g' + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac +fi +BIN_SH=xpg4; export BIN_SH # for Tru64 +DUALCASE=1; export DUALCASE # for MKS sh + +# The HP-UX ksh and POSIX shell print the target directory to stdout +# if CDPATH is set. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +relink_command="(cd /usr/local/google/home/rays/opensrc/git/tesseract/training; { test -z \"\${LIBRARY_PATH+set}\" || unset LIBRARY_PATH || { LIBRARY_PATH=; export LIBRARY_PATH; }; }; { test -z \"\${COMPILER_PATH+set}\" || unset COMPILER_PATH || { COMPILER_PATH=; export COMPILER_PATH; }; }; { test -z \"\${GCC_EXEC_PREFIX+set}\" || unset GCC_EXEC_PREFIX || { GCC_EXEC_PREFIX=; export GCC_EXEC_PREFIX; }; }; { test -z \"\${LD_RUN_PATH+set}\" || unset LD_RUN_PATH || { LD_RUN_PATH=; export LD_RUN_PATH; }; }; { test -z \"\${LD_LIBRARY_PATH+set}\" || unset LD_LIBRARY_PATH || { LD_LIBRARY_PATH=; export LD_LIBRARY_PATH; }; }; PATH=/usr/local/google/home/rays/bin:/usr/lib/google-golang/bin:/usr/local/buildtools/java/jdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/google/home/rays/bin; export PATH; g++ -g -O2 -std=c++11 -o \$progdir/\$file lstmtraining.o ./.libs/libtesseract_training.a ./.libs/libtesseract_tessopt.a -licui18n -licuuc -licudata ../api/.libs/libtesseract.so -L/usr/local/lib /usr/local/lib/liblept.so -lpthread -fopenmp -Wl,-rpath -Wl,/usr/local/google/home/rays/opensrc/git/tesseract/api/.libs)" + +# This environment variable determines our operation mode. +if test "$libtool_install_magic" = "%%%MAGIC variable%%%"; then + # install mode needs the following variables: + generated_by_libtool_version='2.4.2' + notinst_deplibs=' ../api/libtesseract.la' +else + # When we are sourced in execute mode, $file and $ECHO are already set. + if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then + file="$0" + +# A function that is used when there is no print builtin or printf. +func_fallback_echo () +{ + eval 'cat <<_LTECHO_EOF +$1 +_LTECHO_EOF' +} + ECHO="printf %s\\n" + fi + +# Very basic option parsing. These options are (a) specific to +# the libtool wrapper, (b) are identical between the wrapper +# /script/ and the wrapper /executable/ which is used only on +# windows platforms, and (c) all begin with the string --lt- +# (application programs are unlikely to have options which match +# this pattern). +# +# There are only two supported options: --lt-debug and +# --lt-dump-script. There is, deliberately, no --lt-help. +# +# The first argument to this parsing function should be the +# script's ../libtool value, followed by no. +lt_option_debug= +func_parse_lt_options () +{ + lt_script_arg0=$0 + shift + for lt_opt + do + case "$lt_opt" in + --lt-debug) lt_option_debug=1 ;; + --lt-dump-script) + lt_dump_D=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%/[^/]*$%%'` + test "X$lt_dump_D" = "X$lt_script_arg0" && lt_dump_D=. + lt_dump_F=`$ECHO "X$lt_script_arg0" | /bin/sed -e 's/^X//' -e 's%^.*/%%'` + cat "$lt_dump_D/$lt_dump_F" + exit 0 + ;; + --lt-*) + $ECHO "Unrecognized --lt- option: '$lt_opt'" 1>&2 + exit 1 + ;; + esac + done + + # Print the debug banner immediately: + if test -n "$lt_option_debug"; then + echo "lstmtraining:lstmtraining:${LINENO}: libtool wrapper (GNU libtool) 2.4.2 Debian-2.4.2-1.7ubuntu1" 1>&2 + fi +} + +# Used when --lt-debug. Prints its arguments to stdout +# (redirection is the responsibility of the caller) +func_lt_dump_args () +{ + lt_dump_args_N=1; + for lt_arg + do + $ECHO "lstmtraining:lstmtraining:${LINENO}: newargv[$lt_dump_args_N]: $lt_arg" + lt_dump_args_N=`expr $lt_dump_args_N + 1` + done +} + +# Core function for launching the target application +func_exec_program_core () +{ + + if test -n "$lt_option_debug"; then + $ECHO "lstmtraining:lstmtraining:${LINENO}: newargv[0]: $progdir/$program" 1>&2 + func_lt_dump_args ${1+"$@"} 1>&2 + fi + exec "$progdir/$program" ${1+"$@"} + + $ECHO "$0: cannot exec $program $*" 1>&2 + exit 1 +} + +# A function to encapsulate launching the target application +# Strips options in the --lt-* namespace from $@ and +# launches target application with the remaining arguments. +func_exec_program () +{ + case " $* " in + *\ --lt-*) + for lt_wr_arg + do + case $lt_wr_arg in + --lt-*) ;; + *) set x "$@" "$lt_wr_arg"; shift;; + esac + shift + done ;; + esac + func_exec_program_core ${1+"$@"} +} + + # Parse options + func_parse_lt_options "$0" ${1+"$@"} + + # Find the directory that this script lives in. + thisdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'` + test "x$thisdir" = "x$file" && thisdir=. + + # Follow symbolic links until we get to the real thisdir. + file=`ls -ld "$file" | /bin/sed -n 's/.*-> //p'` + while test -n "$file"; do + destdir=`$ECHO "$file" | /bin/sed 's%/[^/]*$%%'` + + # If there was a directory component, then change thisdir. + if test "x$destdir" != "x$file"; then + case "$destdir" in + [\\/]* | [A-Za-z]:[\\/]*) thisdir="$destdir" ;; + *) thisdir="$thisdir/$destdir" ;; + esac + fi + + file=`$ECHO "$file" | /bin/sed 's%^.*/%%'` + file=`ls -ld "$thisdir/$file" | /bin/sed -n 's/.*-> //p'` + done + + # Usually 'no', except on cygwin/mingw when embedded into + # the cwrapper. + WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=no + if test "$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR" = "yes"; then + # special case for '.' + if test "$thisdir" = "."; then + thisdir=`pwd` + fi + # remove .libs from thisdir + case "$thisdir" in + *[\\/].libs ) thisdir=`$ECHO "$thisdir" | /bin/sed 's%[\\/][^\\/]*$%%'` ;; + .libs ) thisdir=. ;; + esac + fi + + # Try to get the absolute directory name. + absdir=`cd "$thisdir" && pwd` + test -n "$absdir" && thisdir="$absdir" + + program=lt-'lstmtraining' + progdir="$thisdir/.libs" + + if test ! -f "$progdir/$program" || + { file=`ls -1dt "$progdir/$program" "$progdir/../$program" 2>/dev/null | /bin/sed 1q`; \ + test "X$file" != "X$progdir/$program"; }; then + + file="$$-$program" + + if test ! -d "$progdir"; then + mkdir "$progdir" + else + rm -f "$progdir/$file" + fi + + # relink executable if necessary + if test -n "$relink_command"; then + if relink_command_output=`eval $relink_command 2>&1`; then : + else + printf %s\n "$relink_command_output" >&2 + rm -f "$progdir/$file" + exit 1 + fi + fi + + mv -f "$progdir/$file" "$progdir/$program" 2>/dev/null || + { rm -f "$progdir/$program"; + mv -f "$progdir/$file" "$progdir/$program"; } + rm -f "$progdir/$file" + fi + + if test -f "$progdir/$program"; then + if test "$libtool_execute_magic" != "%%%MAGIC variable%%%"; then + # Run the actual program with our arguments. + func_exec_program ${1+"$@"} + fi + else + # The program doesn't exist. + $ECHO "$0: error: \`$progdir/$program' does not exist" 1>&2 + $ECHO "This script is just a wrapper for $program." 1>&2 + $ECHO "See the libtool documentation for more information." 1>&2 + exit 1 + fi +fi diff --git a/unittest/Makefile.am b/unittest/Makefile.am index beddae0701..2088c83443 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -28,22 +28,25 @@ AM_CPPFLAGS += -isystem $(top_srcdir)/googletest/googletest/include check_PROGRAMS = \ apiexample_test \ + intsimdmatrix_test \ tesseracttests \ matrix_test TESTS = $(check_PROGRAMS) #List of source files needed to build the executable: + +apiexample_test_SOURCES = apiexample_test.cc +apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS) -tesseracttests_SOURCES = ../tests/tesseracttests.cpp -tesseracttests_LDADD = $(GTEST_LIBS) +intsimdmatrix_test_SOURCES = intsimdmatrix_test.cc +intsimdmatrix_test_LDADD = $(GTEST_LIBS) matrix_test_SOURCES = matrix_test.cc matrix_test_LDADD = $(GTEST_LIBS) - -apiexample_test_SOURCES = apiexample_test.cc -#apiexample_test_LDFLAGS = -static -apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS) + +tesseracttests_SOURCES = ../tests/tesseracttests.cpp +tesseracttests_LDADD = $(GTEST_LIBS) if USING_MULTIPLELIBS apiexample_test_LDADD = \ @@ -60,6 +63,7 @@ apiexample_test_LDADD += $(GTEST_LIBS) # for windows if T_WIN apiexample_test_LDADD += -lws2_32 +intsimdmatrix_test_LDADD += -lws2_32 matrix_test_LDADD += -lws2_32 tesseracttests_LDADD += -lws2_32 diff --git a/unittest/include_gunit.h b/unittest/include_gunit.h new file mode 100644 index 0000000000..42f934bedd --- /dev/null +++ b/unittest/include_gunit.h @@ -0,0 +1,17 @@ +// (C) Copyright 2017, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Portability include to match the Google test environment. +#ifndef TESSERACT_UNITTEST_INCLUDE_GUNIT_H_ +#define TESSERACT_UNITTEST_INCLUDE_GUNIT_H_ + +#include "gtest/gtest.h" + +#endif // TESSERACT_UNITTEST_INCLUDE_GUNIT_H_ diff --git a/unittest/intsimdmatrix_test.cc b/unittest/intsimdmatrix_test.cc new file mode 100644 index 0000000000..08d409f70b --- /dev/null +++ b/unittest/intsimdmatrix_test.cc @@ -0,0 +1,105 @@ +/////////////////////////////////////////////////////////////////////// +// File: intsimdmatrix_test.cc +// Author: rays@google.com (Ray Smith) +// +// Copyright 2017 Google Inc. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// + +#include "intsimdmatrix.h" +#include +#include "genericvector.h" +#include "include_gunit.h" +#include "intsimdmatrixavx2.h" +#include "intsimdmatrixsse.h" +#include "simddetect.h" +#include "tprintf.h" + +namespace tesseract { +namespace { + +class IntSimdMatrixTest : public ::testing::Test { + protected: + // Makes a random weights matrix of the given size. + GENERIC_2D_ARRAY InitRandom(int no, int ni) { + GENERIC_2D_ARRAY a(no, ni, 0); + for (int i = 0; i < no; ++i) { + for (int j = 0; j < ni; ++j) { + a(i, j) = static_cast(random_.SignedRand(MAX_INT8)); + } + } + return a; + } + // Makes a random input vector of the given size, with rounding up. + std::vector RandomVector(int size, const IntSimdMatrix& matrix) { + int rounded_size = matrix.RoundInputs(size); + std::vector v(rounded_size, 0); + for (int i = 0; i < size; ++i) { + v[i] = static_cast(random_.SignedRand(MAX_INT8)); + } + return v; + } + // Makes a random scales vector of the given size. + GenericVector RandomScales(int size) { + GenericVector v(size, 0.0); + for (int i = 0; i < size; ++i) { + v[i] = 1.0 + random_.SignedRand(1.0); + } + return v; + } + // Tests a range of sizes and compares the results against the base_ version. + void ExpectEqualResults(IntSimdMatrix* matrix) { + for (int num_out = 1; num_out < 130; ++num_out) { + for (int num_in = 1; num_in < 130; ++num_in) { + GENERIC_2D_ARRAY w = InitRandom(num_out, num_in + 1); + matrix->Init(w); + std::vector u = RandomVector(num_in, *matrix); + GenericVector scales = RandomScales(num_out); + std::vector base_result(num_out); + base_.MatrixDotVector(w, scales, u.data(), base_result.data()); + std::vector test_result(num_out); + matrix->MatrixDotVector(w, scales, u.data(), test_result.data()); + for (int i = 0; i < num_out; ++i) + EXPECT_FLOAT_EQ(base_result[i], test_result[i]) << "i=" << i; + } + } + } + + TRand random_; + IntSimdMatrix base_; +}; + +// Tests that the SSE implementation gets the same result as the vanilla. +TEST_F(IntSimdMatrixTest, SSE) { + if (SIMDDetect::IsSSEAvailable()) { + LOG(INFO) << "SSE found! Continuing..."; + } else { + LOG(INFO) << "No SSE found! Not Tested!"; + return; + } + std::unique_ptr matrix(new IntSimdMatrixSSE()); + ExpectEqualResults(matrix.get()); +} + +// Tests that the AVX2 implementation gets the same result as the vanilla. +TEST_F(IntSimdMatrixTest, AVX2) { + if (SIMDDetect::IsAVX2Available()) { + LOG(INFO) << "AVX2 found! Continuing..."; + } else { + LOG(INFO) << "No AVX2 found! Not Tested!"; + return; + } + std::unique_ptr matrix(new IntSimdMatrixAVX2()); + ExpectEqualResults(matrix.get()); +} + +} // namespace +} // namespace tesseract