Skip to content

Commit

Permalink
Refactor functions to allow reuse in croppdf and cleanpdf
Browse files Browse the repository at this point in the history
* Put helper functions for adaptive binarization and contrast
  enhancement in adaptmap.c.  They were in pdfapp.c, a location that
  is too high-level for these image processing operations.
* Get reuse because both these programs emit pdfs with 1 bpp images.
* These functions and programs no longer take a threshold variable
  because background normalization is used for robustness.
* Also show program progession by page in cleanpdf.
  • Loading branch information
DanBloomberg committed Aug 30, 2023
1 parent d803c99 commit df0fe14
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 178 deletions.
30 changes: 13 additions & 17 deletions prog/cleanpdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* and concatenate them into a single pdf file of images.
*
* Syntax:
* cleanpdf basedir resolution darken rotation opensize title fileout
* cleanpdf basedir resolution contrast rotation opensize title fileout
*
* A typical command is:
* cleanpdf . 300 0 0 0 none <name-of-output-pdf-file>
Expand All @@ -59,10 +59,10 @@
* At 300 ppi, an 8.5 x 11 page would have 2550 x 3300 pixels.
* You can also input 0 for the default output resolution of 300 ppi.
*
* The %darken parameter adjusts the binarization to avoid losing input
* details that are too light. It takes on 10 values from 0 to 9, where
* 0 is the lightest and is the default. The contrast is increased
* as %darken increases.
* The %contrast parameter adjusts the binarization to avoid losing input
* details that are too light. It takes on 10 values from 1 to 10, where
* 1 is the lightest value and it removes noise. Suggested value is 1
* unless important details are lost on binarization.
*
* The %rotation parameter is an integer that specifies the rotation
* to be applied to each image:
Expand Down Expand Up @@ -145,28 +145,23 @@
#include <sys/types.h>
#include "allheaders.h"

#if 0
/* Special version */
PIX *pixConvertTo8Special(PIX *pix);
#endif

l_int32 main(int argc,
char **argv)
{
char buf[256];
char *basedir, *fname, *tail, *basename, *imagedir, *firstfile, *title;
char *fileout;
l_int32 i, n, res, darken, rotation, opensize, render_res, ret;
l_int32 i, n, res, contrast, rotation, opensize, render_res, ret;
SARRAY *sa;

if (argc != 8)
return ERROR_INT(
"\n Syntax: cleanpdf basedir resolution "
"darken rotation opensize title fileout",
"contrast rotation opensize title fileout",
__func__, 1);
basedir = argv[1];
res = atoi(argv[2]);
darken = atoi(argv[3]);
contrast = atoi(argv[3]);
rotation = atoi(argv[4]);
opensize = atoi(argv[5]);
title = argv[6];
Expand All @@ -178,9 +173,9 @@ SARRAY *sa;
__func__, res);
return 1;
}
if (darken < 0 || darken > 9) {
L_ERROR("invalid darken = %d; darken must be in {0,...,9}\n",
__func__, darken);
if (contrast < 1 || contrast > 10) {
L_ERROR("invalid contrast = %d; contrast must be in {1,...,10}\n",
__func__, contrast);
return 1;
}
if (rotation < 0 || rotation > 3) {
Expand Down Expand Up @@ -266,6 +261,7 @@ SARRAY *sa;
lept_free(imagedir);
sarrayWriteStderr(sa);
lept_stderr("cleaning ...\n");
cleanTo1bppFilesToPdf(sa, res, darken, rotation, opensize, title, fileout);
cleanTo1bppFilesToPdf(sa, res, contrast, rotation, opensize,
title, fileout);
return 0;
}
30 changes: 14 additions & 16 deletions prog/croppdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
* is encoded with tiffg4.
*
* Syntax:
* croppdf basedir threshold lrclear tbclear edgeclean
* lradd tbadd title fileout
* croppdf basedir lrclear tbclear edgeclean lradd tbadd title fileout
*
* The %basedir is a directory where the input pdf files are located.
* The program will operate on every file in this directory with
Expand Down Expand Up @@ -90,23 +89,22 @@ l_int32 main(int argc,
{
char buf[256];
char *basedir, *fname, *tail, *basename, *imagedir, *title, *fileout;
l_int32 threshold, lrclear, tbclear, edgeclean, lradd, tbadd;
l_int32 lrclear, tbclear, edgeclean, lradd, tbadd;
l_int32 render_res, i, n, ret;
SARRAY *sa;

if (argc != 10)
if (argc != 9)
return ERROR_INT(
"Syntax: croppdf basedir threshold lrclear tbclear edgeclean "
"Syntax: croppdf basedir lrclear tbclear edgeclean "
"lradd tbadd title fileout", __func__, 1);
basedir = argv[1];
threshold = atoi(argv[2]);
lrclear = atoi(argv[3]);
tbclear = atoi(argv[4]);
edgeclean = atoi(argv[5]);
lradd = atoi(argv[6]);
tbadd = atoi(argv[7]);
title = argv[8];
fileout = argv[9];
lrclear = atoi(argv[2]);
tbclear = atoi(argv[3]);
edgeclean = atoi(argv[4]);
lradd = atoi(argv[5]);
tbadd = atoi(argv[6]);
title = argv[7];
fileout = argv[8];
setLeptDebugOK(1);

/* Set up a directory for temp images */
Expand Down Expand Up @@ -157,9 +155,9 @@ SARRAY *sa;
sa = getSortedPathnamesInDirectory(imagedir, NULL, 0, 0);
lept_free(imagedir);
sarrayWriteStderr(sa);
lept_stderr("croping ...\n");
cropFilesToPdf(sa, threshold, lrclear, tbclear, edgeclean,
lradd, tbadd, title, fileout);
lept_stderr("cropping ...\n");
cropFilesToPdf(sa, lrclear, tbclear, edgeclean, lradd, tbadd,
title, fileout);

return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion prog/misctest1.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ PIXCMAP *cmap, *cmapg;

/* Page cropping */
pix1 = pixRead("tel_3.tif");
pix2 = pixCropImage(pix1, 160, 30, 30, 4, 25, 25,
pix2 = pixCropImage(pix1, 30, 30, 4, 25, 25,
"/tmp/lept/misc/cropdebug.pdf", NULL);
pixDestroy(&pix1);
pixDestroy(&pix2);
Expand Down
163 changes: 163 additions & 0 deletions src/adaptmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@
* static PIX *pixLinearTRCTiled()
* static l_int32 *iaaGetLinearTRC()
*
* Adaptive normalization with MinMax conversion of RGB to gray,
* contrast enhancement and optional 2x upscale binarization
* PIX *pixBackgroundNormTo1MinMax()
* PIX *pixConvertTo8MinMax()
* static l_int32 *pixSelectiveContrastMod()
*
* Background normalization is done by generating a reduced map (or set
* of maps) representing the estimated background value of the
* input image, and using this to shift the pixel values so that
Expand Down Expand Up @@ -161,6 +167,8 @@ static PIX *pixLinearTRCTiled(PIX *pixd, PIX *pixs, l_int32 sx, l_int32 sy,
PIX *pixmin, PIX *pixmax);
static l_int32 *iaaGetLinearTRC(l_int32 **iaa, l_int32 diff);

static l_ok pixSelectiveContrastMod(PIX *pixs, l_int32 contrast);

#ifndef NO_CONSOLE_IO
#define DEBUG_GLOBAL 0 /*!< set to 1 to debug pixGlobalNormNoSatRGB() */
#endif /* ~NO_CONSOLE_IO */
Expand Down Expand Up @@ -2898,3 +2906,158 @@ l_float32 factor;

return ia;
}


/*------------------------------------------------------------------*
* Adaptive normalization with MinMax conversion of RGB to gray, *
* contrast enhancement and optional 2x upscale binarization *
*------------------------------------------------------------------*/
/*!
* \brief pixBackgroundNormTo1MinMax()
*
* \param[in] pixs any depth, with or without colormap
* \param[in] contrast 1 to 10: 1 reduces contrast; 10 is maximum
* enhancement
* \param[in] scalefactor 1 (no change); 2 (2x upscale)
* \return 1 bpp pix if OK; NULL on error
*
* <pre>
* Notes:
* (1) This is a convenience binarization function that does four things:
* * Generates a grayscale image with color enhancement to gray
* * Background normalization
* * Optional contrast enhancement
* * Binarizes either at input resolution or with 2x upscaling
* (2) If the %pixs is 1 bpp, returns a copy.
* (3) The contrast increasing parameter %contrast takes values {1, ... 10}.
* For decent scans, contrast = 1 is recommended. Use a larger
* value if important details are lost in binarization.
* (4) Valid values of %scalefactor are 1 and 2.
* </pre>
*/
PIX *
pixBackgroundNormTo1MinMax(PIX *pixs,
l_int32 contrast,
l_int32 scalefactor)
{
PIX *pix1, *pix2, *pixd;

if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
if (contrast < 1 || contrast > 10)
return (PIX *)ERROR_PTR("contrast not in [1 ... 10]", __func__, NULL);
if (scalefactor != 1 && scalefactor != 2)
return (PIX *)ERROR_PTR("scalefactor not 1 or 2", __func__, NULL);

if (pixGetDepth(pixs) == 1) {
pixd = pixCopy(NULL, pixs);
} else {
pix1 = pixConvertTo8MinMax(pixs);
pix2 = pixBackgroundNormSimple(pix1, NULL, NULL);
pixSelectiveContrastMod(pix2, contrast);
if (scalefactor == 1)
pixd = pixThresholdToBinary(pix2, 180);
else /* scalefactor == 2 */
pixd = pixScaleGray2xLIThresh(pix2, 180);
pixDestroy(&pix1);
pixDestroy(&pix2);
}
return pixd;
}


/*!
* \brief pixConvertTo8MinMax()
*
* \param[in] pixs any depth, with or without colormap
* \return 8 bpp pix if OK; NULL on error
*
* <pre>
* Notes:
* (1) This is a special version of pixConvert1To8() that removes any
* existing colormap and uses pixConvertRGBToGrayMinMax()
* to strongly render color into black.
* </pre>
*/
PIX *
pixConvertTo8MinMax(PIX *pixs)
{
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);

l_int32 d = pixGetDepth(pixs);
if (d == 1) {
return pixConvert1To8(NULL, pixs, 255, 0);
} else if (d == 2) {
return pixConvert2To8(pixs, 0, 85, 170, 255, FALSE);
} else if (d == 4) {
return pixConvert4To8(pixs, FALSE);
} else if (d == 8) {
if (pixGetColormap(pixs) != NULL)
return pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
else
return pixCopy(NULL, pixs);
} else if (d == 16) {
return pixConvert16To8(pixs, L_MS_BYTE);
} else if (d == 32) {
return pixConvertRGBToGrayMinMax(pixs, L_CHOOSE_MIN);
}

L_ERROR("Invalid depth d = %d\n", __func__, d);
return NULL;
}


/*!
* \brief pixSelectiveContrastMod()
*
* \param[in] pixs 8 bpp without colormap
* \param[in] contrast 1 (default value) for some contrast reduction;
* 10 for maximum contrast enhancement.
* \return 0 if OK, 1 on error
*
* <pre>
* Notes:
* (1) This does in-place contrast enhancement on 8 bpp grayscale that
* has been background normalized to 200. Therefore, there should
* be no gray pixels above 200 in %pixs. For general contrast
* enhancement on gray or color images, see pixContrastTRC().
* (2) Caller restricts %contrast to [1 ... 10].
* (3) Use %contrast = 1 for minimum contrast enhancement (which will
* remove some speckle noise) and %contrast = 10 for maximum
* darkening.
* (4) We use 200 for the white point in all transforms. Using a
* white point above 200 will darken all grayscale pixels.
* </pre>
*/
static l_ok
pixSelectiveContrastMod(PIX *pixs,
l_int32 contrast)
{
if (!pixs || pixGetDepth(pixs) != 8)
return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1);

if (contrast == 1)
pixGammaTRC(pixs, pixs, 2.0, 50, 200);
else if (contrast == 2)
pixGammaTRC(pixs, pixs, 1.8, 60, 200);
else if (contrast == 3)
pixGammaTRC(pixs, pixs, 1.6, 70, 200);
else if (contrast == 4)
pixGammaTRC(pixs, pixs, 1.4, 80, 200);
else if (contrast == 5)
pixGammaTRC(pixs, pixs, 1.2, 90, 200);
else if (contrast == 6)
pixGammaTRC(pixs, pixs, 1.0, 100, 200);
else if (contrast == 7)
pixGammaTRC(pixs, pixs, 0.85, 110, 200);
else if (contrast == 8)
pixGammaTRC(pixs, pixs, 0.7, 120, 200);
else if (contrast == 9)
pixGammaTRC(pixs, pixs, 0.6, 130, 200);
else /* contrast == 10 */
pixGammaTRC(pixs, pixs, 0.5, 140, 200);

return 0;
}

8 changes: 5 additions & 3 deletions src/allheaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ LEPT_DLL extern PIX * pixGlobalNormNoSatRGB ( PIX *pixd, PIX *pixs, l_int32 rval
LEPT_DLL extern l_ok pixThresholdSpreadNorm ( PIX *pixs, l_int32 filtertype, l_int32 edgethresh, l_int32 smoothx, l_int32 smoothy, l_float32 gamma, l_int32 minval, l_int32 maxval, l_int32 targetthresh, PIX **ppixth, PIX **ppixb, PIX **ppixd );
LEPT_DLL extern PIX * pixBackgroundNormFlex ( PIX *pixs, l_int32 sx, l_int32 sy, l_int32 smoothx, l_int32 smoothy, l_int32 delta );
LEPT_DLL extern PIX * pixContrastNorm ( PIX *pixd, PIX *pixs, l_int32 sx, l_int32 sy, l_int32 mindiff, l_int32 smoothx, l_int32 smoothy );
LEPT_DLL extern PIX * pixBackgroundNormTo1MinMax ( PIX *pixs, l_int32 contrast, l_int32 scalefactor );
LEPT_DLL extern PIX * pixConvertTo8MinMax ( PIX *pixs );
LEPT_DLL extern PIX * pixAffineSampledPta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor );
LEPT_DLL extern PIX * pixAffineSampled ( PIX *pixs, l_float32 *vc, l_int32 incolor );
LEPT_DLL extern PIX * pixAffinePta ( PIX *pixs, PTA *ptad, PTA *ptas, l_int32 incolor );
Expand Down Expand Up @@ -1373,7 +1375,7 @@ LEPT_DLL extern PIX * pixGenHalftoneMask ( PIX *pixs, PIX **ppixtext, l_int32 *p
LEPT_DLL extern PIX * pixGenerateHalftoneMask ( PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb );
LEPT_DLL extern PIX * pixGenTextlineMask ( PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb );
LEPT_DLL extern PIX * pixGenTextblockMask ( PIX *pixs, PIX *pixvws, PIXA *pixadb );
LEPT_DLL extern PIX * pixCropImage ( PIX *pixs, l_int32 threshold, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, const char *debugfile, BOX **pcropbox );
LEPT_DLL extern PIX * pixCropImage ( PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, const char *debugfile, BOX **pcropbox );
LEPT_DLL extern BOX * pixFindPageForeground ( PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac );
LEPT_DLL extern l_ok pixSplitIntoCharacters ( PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug );
LEPT_DLL extern BOXA * pixSplitComponentWithProfile ( PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug );
Expand Down Expand Up @@ -1402,8 +1404,8 @@ LEPT_DLL extern l_ok partifyPixac ( PIXAC *pixac, l_int32 nparts, const char *ou
LEPT_DLL extern BOXA * boxaGetWhiteblocks ( BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops );
LEPT_DLL extern BOXA * boxaPruneSortedOnOverlap ( BOXA *boxas, l_float32 maxoverlap );
LEPT_DLL extern l_ok compressFilesToPdf ( SARRAY *sa, l_int32 onebit, l_int32 savecolor, l_float32 scalefactor, l_int32 quality, const char *title, const char *fileout );
LEPT_DLL extern l_ok cropFilesToPdf ( SARRAY *sa, l_int32 threshold, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, const char *title, const char *fileout );
LEPT_DLL extern l_ok cleanTo1bppFilesToPdf ( SARRAY *sa, l_int32 res, l_int32 darken, l_int32 rotation, l_int32 opensize, const char *title, const char *fileout );
LEPT_DLL extern l_ok cropFilesToPdf ( SARRAY *sa, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, const char *title, const char *fileout );
LEPT_DLL extern l_ok cleanTo1bppFilesToPdf ( SARRAY *sa, l_int32 res, l_int32 contrast, l_int32 rotation, l_int32 opensize, const char *title, const char *fileout );
LEPT_DLL extern l_ok convertFilesToPdf ( const char *dirname, const char *substr, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout );
LEPT_DLL extern l_ok saConvertFilesToPdf ( SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout );
LEPT_DLL extern l_ok saConvertFilesToPdfData ( SARRAY *sa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, l_uint8 **pdata, size_t *pnbytes );
Expand Down
Loading

0 comments on commit df0fe14

Please sign in to comment.