diff --git a/api/renderer.cpp b/api/renderer.cpp index 5050a232a1..2aaa36992f 100644 --- a/api/renderer.cpp +++ b/api/renderer.cpp @@ -179,6 +179,61 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { return true; } +/********************************************************************** + * HOcr Text Renderer interface implementation + **********************************************************************/ +TessHOcrTsvRenderer::TessHOcrTsvRenderer(const char *outputbase) + : TessResultRenderer(outputbase, "hocr.tsv") { + font_info_ = false; +} + +TessHOcrTsvRenderer::TessHOcrTsvRenderer(const char *outputbase, bool font_info) + : TessResultRenderer(outputbase, "hocr.tsv") { + font_info_ = font_info; +} + +bool TessHOcrTsvRenderer::BeginDocumentHandler() { + AppendString( + "\n" + "\n" + "\n \n \n"); + AppendString(title()); + AppendString( + "\n" + "\n" + " \n" + " \n" + "\n\n"); + + return true; +} + +bool TessHOcrTsvRenderer::EndDocumentHandler() { + AppendString(" \n\n"); + + return true; +} + +bool TessHOcrTsvRenderer::AddImageHandler(TessBaseAPI* api) { + char* hocr = api->GetHOCRText(imagenum()); + if (hocr == NULL) return false; + + AppendString(hocr); + delete[] hocr; + + return true; +} + /********************************************************************** * UNLV Text Renderer interface implementation **********************************************************************/ diff --git a/api/renderer.h b/api/renderer.h index 4120f74eb3..0713f78e9a 100644 --- a/api/renderer.h +++ b/api/renderer.h @@ -162,6 +162,23 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer { bool font_info_; // whether to print font information }; +/** + * Renders tesseract output into an hocr tsv string + */ +class TESS_API TessHOcrTsvRenderer : public TessResultRenderer { + public: + explicit TessHOcrTsvRenderer(const char *outputbase, bool font_info); + explicit TessHOcrTsvRenderer(const char *outputbase); + +protected: + virtual bool BeginDocumentHandler(); + virtual bool AddImageHandler(TessBaseAPI* api); + virtual bool EndDocumentHandler(); + +private: + bool font_info_; // whether to print font information +}; + /** * Renders tesseract output into searchable PDF */