Skip to content

Commit

Permalink
Add config variable for selection of dot product function
Browse files Browse the repository at this point in the history
All also a C++ implementation with more aggressive compiler options
which is optimized for the CPU where the software was built.

It is now possible to select the function used for the dot product
with -c dotproduct=FUNCTION where FUNCTION can be one of those values:

* auto      selection based on detected hardware (default)
* generic   C++ code with default compiler options
* native    C++ code optimized for build host
* avx       optimized code for AVX
* sse       optimized code for SSE

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Nov 30, 2018
1 parent b527b37 commit f0a4d04
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 32 deletions.
1 change: 1 addition & 0 deletions src/api/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ libtesseract_la_LIBADD = \
../classify/libtesseract_classify.la \
../dict/libtesseract_dict.la \
../arch/libtesseract_arch.la \
../arch/libtesseract_native.la \
../arch/libtesseract_avx.la \
../arch/libtesseract_avx2.la \
../arch/libtesseract_sse.la \
Expand Down
6 changes: 4 additions & 2 deletions src/api/tesseractmain.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
/**********************************************************************
* File: tesseractmain.cpp (Formerly tessedit.c)
* File: tesseractmain.cpp
* Description: Main program for merge of tess and editor.
* Author: Ray Smith
* Created: Tue Jan 07 15:21:46 GMT 1992
*
* (C) Copyright 1992, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -585,6 +584,9 @@ int main(int argc, char** argv) {

SetVariablesFromCLArgs(&api, argc, argv);

// SIMD settings might be overridden by config variable.
tesseract::SIMDDetect::Update();

if (list_langs) {
PrintLangsList(&api);
return EXIT_SUCCESS;
Expand Down
9 changes: 7 additions & 2 deletions src/arch/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ endif

pkginclude_HEADERS =

noinst_HEADERS = dotproductavx.h dotproductsse.h
noinst_HEADERS = dotproduct.h dotproductavx.h dotproductsse.h
noinst_HEADERS += intsimdmatrix.h intsimdmatrixavx2.h intsimdmatrixsse.h
noinst_HEADERS += simddetect.h

noinst_LTLIBRARIES = libtesseract_avx.la libtesseract_avx2.la libtesseract_sse.la
noinst_LTLIBRARIES = libtesseract_native.la
noinst_LTLIBRARIES += libtesseract_avx.la libtesseract_avx2.la
noinst_LTLIBRARIES += libtesseract_sse.la
noinst_LTLIBRARIES += libtesseract_arch.la

if AVX_OPT
Expand All @@ -27,6 +29,9 @@ if SSE41_OPT
libtesseract_sse_la_CXXFLAGS = -ffast-math -msse4.1
endif

libtesseract_native_la_CXXFLAGS = -O3 -ffast-math -march=native -mtune=native
libtesseract_native_la_SOURCES = dotproduct.cpp

libtesseract_arch_la_SOURCES = intsimdmatrix.cpp simddetect.cpp

libtesseract_avx_la_SOURCES = dotproductavx.cpp
Expand Down
28 changes: 28 additions & 0 deletions src/arch/dotproduct.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
///////////////////////////////////////////////////////////////////////
// File: dotproduct.h
// Description: Native dot product function.
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#include "dotproduct.h"

namespace tesseract {

// Computes and returns the dot product of the two n-vectors u and v.
double DotProductNative(const double* u, const double* v, int n) {
double total = 0.0;
for (int k = 0; k < n; ++k) total += u[k] * v[k];
return total;
}

} // namespace tesseract
27 changes: 27 additions & 0 deletions src/arch/dotproduct.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
///////////////////////////////////////////////////////////////////////
// File: dotproduct.h
// Description: Native dot product function.
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_ARCH_DOTPRODUCT_H_
#define TESSERACT_ARCH_DOTPRODUCT_H_

namespace tesseract {

// Computes and returns the dot product of the n-vectors u and v.
double DotProductNative(const double* u, const double* v, int n);

} // namespace tesseract.

#endif // TESSERACT_ARCH_DOTPRODUCT_H_
85 changes: 85 additions & 0 deletions src/arch/simddetect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
///////////////////////////////////////////////////////////////////////

#include "simddetect.h"
#include "dotproduct.h"
#include "dotproductavx.h"
#include "dotproductsse.h"
#include "params.h" // for STRING_VAR
#include "tprintf.h" // for tprintf

#undef X86_BUILD
#if defined(__x86_64__) || defined(__i386__) || defined(_WIN32)
Expand All @@ -34,6 +39,21 @@

namespace tesseract {

// Computes and returns the dot product of the two n-vectors u and v.
// Note: because the order of addition is different among the different dot
// product functions, the results can (and do) vary slightly (although they
// agree to within about 4e-15). This produces different results when running
// training, despite all random inputs being precisely equal.
// To get consistent results, use just one of these dot product functions.
// On a test multi-layer network, serial is 57% slower than SSE, and AVX
// is about 8% faster than SSE. This suggests that the time is memory
// bandwidth constrained and could benefit from holding the reused vector
// in AVX registers.
DotProductFunction DotProduct;

static STRING_VAR(dotproduct, "auto",
"Function used for calculation of dot product");

SIMDDetect SIMDDetect::detector;

// If true, then AVX has been detected.
Expand All @@ -44,12 +64,26 @@ bool SIMDDetect::avx512BW_available_;
// If true, then SSe4.1 has been detected.
bool SIMDDetect::sse_available_;

// Computes and returns the dot product of the two n-vectors u and v.
static double DotProductGeneric(const double* u, const double* v, int n) {
double total = 0.0;
for (int k = 0; k < n; ++k) total += u[k] * v[k];
return total;
}

static void SetDotProduct(DotProductFunction function) {
DotProduct = function;
}

// Constructor.
// Tests the architecture in a system-dependent way to detect AVX, SSE and
// any other available SIMD equipment.
// __GNUC__ is also defined by compilers that include GNU extensions such as
// clang.
SIMDDetect::SIMDDetect() {
// The fallback is a generic dot product calculation.
SetDotProduct(DotProductGeneric);

#if defined(X86_BUILD)
# if defined(__GNUC__)
unsigned int eax, ebx, ecx, edx;
Expand Down Expand Up @@ -80,6 +114,57 @@ SIMDDetect::SIMDDetect() {
# error "I don't know how to test for SIMD with this compiler"
# endif
#endif // X86_BUILD

#if defined(X86_BUILD)
// Select code for calculation of dot product based on autodetection.
if (avx_available_) {
// AVX detected.
SetDotProduct(DotProductAVX);
} else if (sse_available_) {
// SSE detected.
SetDotProduct(DotProductSSE);
}
#endif // X86_BUILD
}

void SIMDDetect::Update() {
// Select code for calculation of dot product based on the
// value of the config variable if that value is not empty.
const char* dotproduct_method = "generic";
if (!strcmp(dotproduct.string(), "auto")) {
// Automatic detection. Nothing to be done.
} else if (!strcmp(dotproduct.string(), "generic")) {
// Generic code selected by config variable.
SetDotProduct(DotProductGeneric);
dotproduct_method = "generic";
} else if (!strcmp(dotproduct.string(), "native")) {
// Native optimized code selected by config variable.
SetDotProduct(DotProductNative);
dotproduct_method = "native";
}
#if defined(X86_BUILD)
else if (!strcmp(dotproduct.string(), "avx")) {
// AVX selected by config variable.
SetDotProduct(DotProductAVX);
dotproduct_method = "avx";
} else if (!strcmp(dotproduct.string(), "sse")) {
// SSE selected by config variable.
SetDotProduct(DotProductSSE);
dotproduct_method = "sse";
}
#endif // X86_BUILD
else {
// Unsupported value of config variable.
tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
dotproduct.string());
tprintf("Support values for dotproduct: auto generic native"
#if defined(X86_BUILD)
" avx sse"
#endif // X86_BUILD
".\n");
}

dotproduct.set_value(dotproduct_method);
}

} // namespace tesseract
7 changes: 7 additions & 0 deletions src/arch/simddetect.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@

namespace tesseract {

// Function pointer for best calculation of dot product.
typedef double (*DotProductFunction)(const double* u, const double* v, int n);
extern DotProductFunction DotProduct;

// Architecture detector. Add code here to detect any other architectures for
// SIMD-based faster dot product functions. Intended to be a single static
// object, but it does no real harm to have more than one.
Expand All @@ -41,6 +45,9 @@ class SIMDDetect {
// Returns true if SSE4.1 is available on this system.
static inline bool IsSSEAvailable() { return detector.sse_available_; }

// Update settings after config variable was set.
static void Update();

private:
// Constructor, must set all static member variables.
SIMDDetect();
Expand Down
28 changes: 1 addition & 27 deletions src/lstm/weightmatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// File: weightmatrix.cpp
// Description: Hides distinction between float/int implementations.
// Author: Ray Smith
// Created: Tue Jun 17 11:46:20 PST 2014
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -18,10 +17,8 @@

#include "weightmatrix.h"

#include "dotproductavx.h"
#include "dotproductsse.h"
#include "intsimdmatrix.h"
#include "simddetect.h"
#include "simddetect.h" // for DotProduct
#include "statistc.h"
#include "tprintf.h"

Expand All @@ -38,29 +35,6 @@ const int kAdamCorrectionIterations = 200000;
// Epsilon in Adam to prevent division by zero.
const double kAdamEpsilon = 1e-8;

// Computes and returns the dot product of the two n-vectors u and v.
static inline double DotProduct(const double* u, const double* v, int n) {
// Note: because the order of addition is different among the 3 DotProduct
// functions, the results can (and do) vary slightly (although they agree
// to within about 4e-15). This produces different results when running
// training, despite all random inputs being precisely equal.
// To get consistent results, use just one of these DotProduct functions.
// On a test multi-layer network, serial is 57% slower than sse, and avx
// is about 8% faster than sse. This suggests that the time is memory
// bandwidth constrained and could benefit from holding the reused vector
// in AVX registers.

if (SIMDDetect::IsAVXAvailable())
return DotProductAVX(u, v, n);

if (SIMDDetect::IsSSEAvailable())
return DotProductSSE(u, v, n);

double total = 0.0;
for (int k = 0; k < n; ++k) total += u[k] * v[k];
return total;
}

// Computes matrix.vector v = Wu.
// u is of size W.dim2() - add_bias_fwd and the output v is of size
// W.dim1() - skip_bias_back.
Expand Down
1 change: 0 additions & 1 deletion src/lstm/weightmatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// File: weightmatrix.h
// Description: Hides distinction between float/int implementations.
// Author: Ray Smith
// Created: Tue Jun 17 09:05:39 PST 2014
//
// (C) Copyright 2014, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
Expand Down

0 comments on commit f0a4d04

Please sign in to comment.