Skip to content

Commit

Permalink
Implement a new orientation and script detection API for C and C++
Browse files Browse the repository at this point in the history
See issue #424.

The existing C API for TessBaseAPIDetectOS requires a C caller to successfully allocate struct OSResults which is actually a C++ class.  Generally it won't
be possible for a regular C compiler to do this properly.

It's also assumed that most API level users of Tesseract are only interested in Tesseract's best guess as to script and orientation, not the individual values for all possible scripts.

This introduces a new API with a better name that is more closely aligned with the output of 'tesseract -psm 0'.  Both tesseract -psm 0 and this API now share the same code in baseapi.cpp.
  • Loading branch information
James R. Barlow committed Dec 7, 2016
1 parent 929d25c commit bc95798
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 13 deletions.
37 changes: 25 additions & 12 deletions api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1891,31 +1891,44 @@ char* TessBaseAPI::GetUNLVText() {
return result;
}

/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* TessBaseAPI::GetOsdText(int page_number) {
bool TessBaseAPI::DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf) {
OSResults osr;

bool osd = DetectOS(&osr);
if (!osd) {
return NULL;
return false;
}

int orient_id = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient_id);
float orient_conf = osr.best_result.oconfidence;
float script_conf = osr.best_result.sconfidence;
orient_conf = osr.best_result.oconfidence;
script_conf = osr.best_result.sconfidence;
const char* script_name =
osr.unicharset->get_script_from_script_id(script_id);

// clockwise orientation of the input image, in degrees
int orient_deg = orient_id * 90;
orient_deg = orient_id * 90;

script = script_name;
return true;
}

/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* TessBaseAPI::GetOsdText(int page_number) {
int orient_deg;
float orient_conf;
std::string script_name;
float script_conf;

if (!DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf))
return NULL;

// clockwise rotation needed to make the page upright
int rotate = OrientationIdToValue(orient_id);
int rotate = OrientationIdToValue(orient_deg / 90);

const int kOsdBufsize = 255;
char* osd_buf = new char[kOsdBufsize];
Expand All @@ -1926,7 +1939,7 @@ char* TessBaseAPI::GetOsdText(int page_number) {
"Orientation confidence: %.2f\n"
"Script: %s\n"
"Script confidence: %.2f\n",
page_number, orient_deg, rotate, orient_conf, script_name,
page_number, orient_deg, rotate, orient_conf, script_name.c_str(),
script_conf);

return osd_buf;
Expand Down
10 changes: 10 additions & 0 deletions api/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
(patch))

#include <stdio.h>
#include <string>
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
Expand Down Expand Up @@ -618,6 +619,15 @@ class TESS_API TessBaseAPI {
*/
char* GetUNLVText();

/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image
* orient_conf is the confidence (15.0 is reasonable)
* script is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
*/
bool DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf);

/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
Expand Down
31 changes: 30 additions & 1 deletion api/capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,9 +538,38 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand

TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results)
{
return handle->DetectOS(results) ? TRUE : FALSE;
return FALSE; // Unsafe ABI, return FALSE always
}

TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name,
int* best_orientation_deg, float* script_confidence,
float* orientation_confidence)
{
int orient_deg;
float orient_conf;
std::string script_name;
float script_conf;
BOOL success;

success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf);
if (!success)
return FALSE;
if (best_script_name) {
*best_script_name = new char [script_name.length() + 1];
strcpy(*best_script_name, script_name.c_str());
}

if (best_orientation_deg)
*best_orientation_deg = orient_deg;
if (script_confidence)
*script_confidence = script_conf;
if (orientation_confidence)
*orientation_confidence = orient_conf;

return TRUE;
}


TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex)
{
Expand Down
7 changes: 7 additions & 0 deletions api/capi.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,15 @@ TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle);
TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f);

TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f);

// Deprecated, no longer working
TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results);

// Call TessDeleteText(*best_script_name) to free memory allocated by this function
TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name,
int* best_orientation_deg, float* script_confidence,
float* orientation_confidence);

TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features,
int* num_features, int* FeatureOutlineIndex);

Expand Down

0 comments on commit bc95798

Please sign in to comment.