Skip to content

Commit

Permalink
MDL-70038 assign: add suport for pdftoppm tool
Browse files Browse the repository at this point in the history
pdftoppm (from poppler-utils package) is several orders of
magnitude quicker than ghostscript extracting PNG images
from PDF documents.

We add support for this tool and use it whenever set up,
or using gs as before if missing.

Revisited code with peer review comments. In particular:

1. Make default path for pdftoppm empty string.
2. Fix a typo on the method description for get_gs_command_for_image.
3. Added more information why is useful pdftoppm into string pathtopdftoppm_help.
4. Check that path for pdftoppm is executable to prevent errors. Otherwise, use gs.
  • Loading branch information
jpahullo committed Feb 23, 2021
1 parent a2fda12 commit 0846a76
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 11 deletions.
2 changes: 2 additions & 0 deletions admin/settings/server.php
Expand Up @@ -38,6 +38,8 @@
new lang_string('pathtodot_help', 'admin'), ''));
$temp->add(new admin_setting_configexecutable('pathtogs', new lang_string('pathtogs', 'admin'),
new lang_string('pathtogs_help', 'admin'), '/usr/bin/gs'));
$temp->add(new admin_setting_configexecutable('pathtopdftoppm', new lang_string('pathtopdftoppm', 'admin'),
new lang_string('pathtopdftoppm_help', 'admin'), ''));
$temp->add(new admin_setting_configexecutable('pathtopython', new lang_string('pathtopython', 'admin'),
new lang_string('pathtopythondesc', 'admin'), ''));
$ADMIN->add('server', $temp);
Expand Down
2 changes: 2 additions & 0 deletions lang/en/admin.php
Expand Up @@ -949,6 +949,8 @@
$string['pathtodu'] = 'Path to du';
$string['pathtogs'] = 'Path to ghostscript';
$string['pathtogs_help'] = 'On most Linux installs, this can be left as \'/usr/bin/gs\'. On Windows it will be something like \'c:\\gs\\bin\\gswin32c.exe\' (make sure there are no spaces in the path - if necessary copy the files \'gswin32c.exe\' and \'gsdll32.dll\' to a new folder without a space in the path)';
$string['pathtopdftoppm'] = 'Path to pdftoppm';
$string['pathtopdftoppm_help'] = '\'pdftoppm\' is a tool that converts PDF pages to PNG at least as fast as \'gs\' does. However, you will probably have a better performance when converting large documents. If present, \'pdftoppm\' will be used instead of \'gs\' for this task. On most Linux installs, this can be left as \'/usr/bin/pdftoppm\'. If not present, install the poppler-utils or poppler package, depending on the Linux distribution. On Windows it will be provided by Cygwin installs. See <a href="https://poppler.freedesktop.org/" target="_blank">Poppler project</a> for more details.';
$string['pathtopgdump'] = 'Path to pg_dump';
$string['pathtopgdumpdesc'] = 'This is only necessary to enter if you have more than one pg_dump on your system (for example if you have more than one version of postgresql installed)';
$string['pathtopgdumpinvalid'] = 'Invalid path to pg_dump - either wrong path or not executable';
Expand Down
67 changes: 56 additions & 11 deletions mod/assign/feedback/editpdf/classes/pdf.php
Expand Up @@ -536,8 +536,6 @@ public function set_image_folder($folder) {
* @return string the filename of the generated image
*/
public function get_image($pageno) {
global $CFG;

if (!$this->filename) {
throw new \coding_exception('Attempting to generate a page image without first setting the PDF filename');
}
Expand All @@ -560,15 +558,7 @@ public function get_image($pageno) {
}

if ($generate) {
// Use ghostscript to generate an image of the specified page.
$gsexec = \escapeshellarg($CFG->pathtogs);
$imageres = \escapeshellarg(100);
$imagefilearg = \escapeshellarg($imagefile);
$filename = \escapeshellarg($this->filename);
$pagenoinc = \escapeshellarg($pageno + 1);
$command = "$gsexec -q -sDEVICE=png16m -dSAFER -dBATCH -dNOPAUSE -r$imageres -dFirstPage=$pagenoinc -dLastPage=$pagenoinc ".
"-dDOINTERPOLATE -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sOutputFile=$imagefilearg $filename";

$command = $this->get_command_for_image($pageno, $imagefile);
$output = null;
$result = exec($command, $output);
if (!file_exists($imagefile)) {
Expand All @@ -585,6 +575,61 @@ public function get_image($pageno) {
return self::IMAGE_PAGE . $pageno . '.png';
}

/**
* Gets the command to use to extract as image the given $pageno page number
* from a PDF document into the $imagefile file.
* @param int $pageno Page number to extract from document.
* @param string $imagefile Target filename for the PNG image as absolute path.
* @return string The command to use to extract a page as PNG image.
*/
private function get_command_for_image(int $pageno, string $imagefile): string {
global $CFG;

// First, quickest convertion option.
if (!empty($CFG->pathtopdftoppm) && is_executable($CFG->pathtopdftoppm)) {
return $this->get_pdftoppm_command_for_image($pageno, $imagefile);
}

// Otherwise, rely on default behaviour.
return $this->get_gs_command_for_image($pageno, $imagefile);
}

/**
* Gets the pdftoppm command to use to extract as image the given $pageno page number
* from a PDF document into the $imagefile file.
* @param int $pageno Page number to extract from document.
* @param string $imagefile Target filename for the PNG image as absolute path.
* @return string The pdftoppm command to use to extract a page as PNG image.
*/
private function get_pdftoppm_command_for_image(int $pageno, string $imagefile): string {
global $CFG;
$pdftoppmexec = \escapeshellarg($CFG->pathtopdftoppm);
$imageres = \escapeshellarg(100);
$imagefile = substr($imagefile, 0, -4); // Pdftoppm tool automatically adds extension file.
$imagefilearg = \escapeshellarg($imagefile);
$filename = \escapeshellarg($this->filename);
$pagenoinc = \escapeshellarg($pageno + 1);
return "$pdftoppmexec -q -r $imageres -f $pagenoinc -l $pagenoinc -png -singlefile $filename $imagefilearg";
}

/**
* Gets the ghostscript (gs) command to use to extract as image the given $pageno page number
* from a PDF document into the $imagefile file.
* @param int $pageno Page number to extract from document.
* @param string $imagefile Target filename for the PNG image as absolute path.
* @return string The ghostscript (gs) command to use to extract a page as PNG image.
*/
private function get_gs_command_for_image(int $pageno, string $imagefile): string {
global $CFG;
$gsexec = \escapeshellarg($CFG->pathtogs);
$imageres = \escapeshellarg(100);
$imagefilearg = \escapeshellarg($imagefile);
$filename = \escapeshellarg($this->filename);
$pagenoinc = \escapeshellarg($pageno + 1);
return "$gsexec -q -sDEVICE=png16m -dSAFER -dBATCH -dNOPAUSE -r$imageres -dFirstPage=$pagenoinc -dLastPage=$pagenoinc ".
"-dDOINTERPOLATE -dGraphicsAlphaBits=4 -dTextAlphaBits=4 -sOutputFile=$imagefilearg $filename";
}

/**
* Check to see if PDF is version 1.4 (or below); if not: use ghostscript to convert it
*
Expand Down

0 comments on commit 0846a76

Please sign in to comment.