Skip to content

Commit

Permalink
add getPageRegions api in libk2pdfopt
Browse files Browse the repository at this point in the history
  • Loading branch information
chrox committed Jan 2, 2014
1 parent 187bbb9 commit bf3ef2a
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 1 deletion.
15 changes: 15 additions & 0 deletions ffi/koptcontext.lua
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,19 @@ function KOPTContext_mt.__index:getTOCRWord(x, y, w, h, datadir, lang, ocr_type,
return ffi.string(word)
end

function KOPTContext_mt.__index:getPageRegions()
k2pdfopt.k2pdfopt_part_bmp(self)
local w, h = self.page_width, self.page_height
local regions = {}
for i = 0, self.pageregions.n - 1 do
local bmpregion = (self.pageregions.pageregion + i).bmpregion
table.insert(regions, {
x0 = bmpregion.c1/w, x1 = bmpregion.c2/w,
y0 = bmpregion.r1/h, y1 = bmpregion.r2/h })
end
return regions
end

function KOPTContext_mt.__index:free()
--[[ Don't worry about the src bitmap in context. It's freed as soon as it's
been used in either reflow or autocrop. But we should take care of dst
Expand All @@ -206,6 +219,7 @@ function KOPTContext_mt.__index:free()
leptonica.boxaDestroy(nboxa)
k2pdfopt.bmp_free(self.dst)
k2pdfopt.wrectmaps_free(self.rectmaps)
k2pdfopt.pageregions_free(self.pageregions)
end

function KOPTContext_mt.__index:__gc() self:free() end
Expand Down Expand Up @@ -258,6 +272,7 @@ function KOPTContext.new()
k2pdfopt.bmp_init(kc.src)
k2pdfopt.bmp_init(kc.dst)
k2pdfopt.wrectmaps_init(kc.rectmaps)
k2pdfopt.pageregions_init(kc.pageregions)

return kc
end
Expand Down
62 changes: 62 additions & 0 deletions ffi/koptcontext_h.lua
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,64 @@ typedef struct {
int type; /* See defines above for WILLUSBITMAP_TYPE_... */
} WILLUSBITMAP;

typedef struct {
int ch; /* Hyphen starting point -- < 0 for no hyphen */
int c2; /* End of end region if hyphen is erased */
int r1; /* Top of hyphen */
int r2; /* Bottom of hyphen */
} HYPHENINFO;

typedef struct {
int c1,c2; /* Left and right columns */
int r1,r2; /* Top and bottom of region in pixels */
int rowbase; /* Baseline of row */
int gap; /* Gap between next region and this region's rowbase. */
int gapblank; /* Actual blank area between next region and this region. */
int rowheight; /* text + gap (delta between rowbases) */
int capheight;
int h5050;
int lcheight;
int type; /* See region type #defines above */
double rat; /* If found with find_doubles, this is > 0 (the figure of merit) */
HYPHENINFO hyphen;
} TEXTROW;

typedef struct {
TEXTROW *textrow;
int n,na;
} TEXTROWS;

typedef struct {
int r1,r2; /* row position from top of bmp, inclusive */
int c1,c2; /* column positions, inclusive */
TEXTROWS textrows; /* If nrows>0, top and bottom (if nrows>11) text row of region */
TEXTROW bbox; /* Bounding box of region. type==REGION_TYPE_UNDETERMINED if not calced yet */
WRECTMAPS *wrectmaps; /* If region consists of multiple, separated source rectangles
** (like from wrapbmp structure), then this is non-null and maps
** the bitmap region to the source page.
*/
int bgcolor; /* Background color of region, 0 - 255 */
int dpi; /* dpi of bitmap */
int pageno; /* Source page number, -1 if unknown */
int rotdeg; /* Source rotation, degrees, counterclockwise */
int *colcount; /* Always check for NULL before using */
int *rowcount; /* Always check for NULL before using */
WILLUSBITMAP *bmp;
WILLUSBITMAP *bmp8;
WILLUSBITMAP *marked;
} BMPREGION;

typedef struct {
BMPREGION bmpregion;
int fullspan;
int level;
} PAGEREGION;

typedef struct {
PAGEREGION *pageregion;
int n,na;
} PAGEREGIONS;

typedef struct KOPTContext {
int trim;
int wrap;
Expand Down Expand Up @@ -115,6 +173,7 @@ typedef struct KOPTContext {
BOXA *nboxa; // word boxes in native page
NUMA *nnai; // word boxes indices in native page
WRECTMAPS rectmaps; // rect maps between reflowed and native pages
PAGEREGIONS pageregions; // sorted region list by display order
BBox bbox;
char *language;
WILLUSBITMAP dst;
Expand Down Expand Up @@ -156,4 +215,7 @@ void k2pdfopt_tocr_single_word(WILLUSBITMAP *src,
int bmpmupdf_pdffile_to_bmp(WILLUSBITMAP *bmp,char *filename,int pageno,double dpi,int bpp);
void k2pdfopt_reflow_bmp(KOPTContext *kctx);
void k2pdfopt_tocr_end();
void pageregions_init(PAGEREGIONS *regions);
void pageregions_free(PAGEREGIONS *regions);
void k2pdfopt_part_bmp(KOPTContext *kctx);
]]
Binary file added spec/unit/data/Paper.pdf
Binary file not shown.
11 changes: 11 additions & 0 deletions spec/unit/koptcontext_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ local KOPTContext = require("ffi/koptcontext")
local k2pdfopt = ffi.load("libs/libk2pdfopt.so.2")

local sample_pdf = "spec/unit/data/Alice.pdf"
local paper_pdf = "spec/unit/data/Paper.pdf"

describe("KOPTContext module", function()
it("should be created", function()
Expand Down Expand Up @@ -131,4 +132,14 @@ describe("KOPTContext module", function()
kc:free()
assert(kc.dst.size_allocated == 0)
end)
it("should get list of page regions", function()
local kc = KOPTContext.new()
k2pdfopt.bmpmupdf_pdffile_to_bmp(kc.dst, ffi.cast("char*", paper_pdf), 1, 300, 8)
kc.page_width, kc.page_height = kc.dst.width, kc.dst.height
local regions = kc:getPageRegions()
for i = 1, #regions do
assert(regions[i].x1 - regions[i].x0 <= 1)
assert(regions[i].y1 - regions[i].y0 <= 1)
end
end)
end)

0 comments on commit bf3ef2a

Please sign in to comment.