From 1c8f998133c7f8836c48dad955ecdcca3074e2a1 Mon Sep 17 00:00:00 2001 From: Thorsten Vitt Date: Fri, 4 Sep 2020 17:03:47 +0200 Subject: [PATCH] Removed pdf merge experiments --- README.md | 29 +++------------ .../java/net/faustedition/gen/PdfMerger.java | 35 ------------------- .../faustedition/gen/PdfMergerBoxBased.java | 32 ----------------- 3 files changed, 5 insertions(+), 91 deletions(-) delete mode 100644 src/main/java/net/faustedition/gen/PdfMerger.java delete mode 100644 src/main/java/net/faustedition/gen/PdfMergerBoxBased.java diff --git a/README.md b/README.md index a00155e..2016367 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ There is a preliminary eXist app that implements the search functionality. Deplo ## Advanced usage +!! Parts of this section are slightly outdated … + The build uses _profiles_ to select the parts that should run. The profile `svg` (`mvn -Psvg package`) runs the SVG generation, the profile `xproc` the XProc stuff. Everything is on by default, so just running `mvn clean package` will generate the whole site (except images) in `target/www` ## Components @@ -31,33 +33,11 @@ The diplomatic transcripts are rendered page by page using JavaScript in a simul The code that does the actual rendering can be found in . This folder contains a simple web page, with font resources etc. pulled in from faust-web, plus the rendering code mainly developed by Moritz Wissenbach in . -To create both the diplomatic transcript and the overlay transcript for a single page, is called using [PhantomJS](http://phantomjs.org/) or [SlimerJS](http://slimerjs.org), which will load in its simulated browser, trigger the rendering scripts there, and then extract and store the rendered SVGs. +To create both the diplomatic transcript and the overlay transcript for a single page, is called using node.js. This uses [Puppeteer](https://pptr.dev/) to remote-control a headless Chromium browser in which each page will be rendered as SVG. The SVGs will then be extracted and stored so they can later be included in the edition’s UI. The JS does not directly work with the XML transcripts. Instead, each page needs to be transformed to a JSON representation, which is done using code from https://github.com/faustedition/faust-app, which is pulled in as a Maven dependency. The Java program at is used to run the actual pipeline, i.e. iterate through the manuscripts and their pages, convert stuff to JSON, and run on each of these JSON files. Intermediate results (i.e. JSON files) and, if enabled, debugging data (e.g., PDFs) are written to the target directory. -The process might well take 1.5h, it is bound to the `svg` profile. - -#### PhantomJS or SlimerJS? - -The process can run using either PhantomJS (based on QtWebKit) or SlimerJS (using Firefox). Which of the libraries is used needs to be configured in the `pom.xml`, there are comments illustrating the configuration. - -While PhantomJS bundles its own browser version, SlimerJS needs a firefox binary instead. The pom will try to get a specific Firefox version using maven dependencies – if it fails to do so, you can either deploy the specific Firefox version to a maven repo (or install it to your local repo), or uncomment the SLIMERJSthe specific Firefox version to a maven repo (or install it to your local repo), or comment the dependency and the SLIMERJSLAUNCHER environment variable setting in the pom to use your system's installed firefox. - -Here's what I used to deploy firefox: - -```bash -mvn deploy:deploy-file \ - -Durl=http://dev.digital-humanities.de/nexus/content/repositories/thirdparty/ -DrepositoryId=thirdparty \ - -DgroupId=org.mozilla -DartifactId=firefox -Dclassifier=linux-x86_64 \ - -Dpackaging=tar.bz2 -Dversion=45.4.0esr -Dfile=firefox-45.4.0esr.tar.bz2 -``` - -SlimerJS isn't really headless, it opens and closes two windows per converted page. To avoid this, use, e.g., - -```bash -xvfb-run mvn -``` - +The process might well take 1.5h, it is bound to the `svg` profile. All components — nodejs, puppeteer as well as Chromium – will be downloaded on first run. ### Textual transcripts, metadata, and overview data @@ -89,3 +69,4 @@ There are two steps that involve pulling in data from the internal wiki: * filling the eXist instance, see the scripts in faust-gen-html * preparing the facsimiles, see convert.sh +* running the macrogenesis part diff --git a/src/main/java/net/faustedition/gen/PdfMerger.java b/src/main/java/net/faustedition/gen/PdfMerger.java deleted file mode 100644 index c79617f..0000000 --- a/src/main/java/net/faustedition/gen/PdfMerger.java +++ /dev/null @@ -1,35 +0,0 @@ -package net.faustedition.gen; - -import com.itextpdf.kernel.pdf.PdfDocument; -import com.itextpdf.kernel.pdf.PdfReader; -import com.itextpdf.kernel.pdf.PdfWriter; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; - -public class PdfMerger { - - public static void main(String[] argv) { - final PdfWriter writer; - try { - writer = new PdfWriter("/tmp/out.pdf"); - writer.setSmartMode(true); - final PdfDocument pdfDocument = new PdfDocument(writer); - pdfDocument.initializeOutlines(); - Path root = Paths.get("/home/tv/git/faust-gen/target/www/transcript/diplomatic/2_H"); - - for (int i = 1; i < 390; i++) { - PdfDocument addedDoc = new PdfDocument(new PdfReader(root.resolve(String.format("page_%d.pdf", i)).toString())); - addedDoc.copyPagesTo(1, addedDoc.getNumberOfPages(), pdfDocument); - addedDoc.close(); - } - pdfDocument.close(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/src/main/java/net/faustedition/gen/PdfMergerBoxBased.java b/src/main/java/net/faustedition/gen/PdfMergerBoxBased.java deleted file mode 100644 index 0f369c2..0000000 --- a/src/main/java/net/faustedition/gen/PdfMergerBoxBased.java +++ /dev/null @@ -1,32 +0,0 @@ -package net.faustedition.gen; - -import org.apache.pdfbox.io.MemoryUsageSetting; -import org.apache.pdfbox.multipdf.PDFMergerUtility; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; - -public class PdfMergerBoxBased { - - - public static void main(String[] argv) { - final PDFMergerUtility merger = new PDFMergerUtility(); - merger.setDestinationFileName("/tmp/2_H_box.pdf"); - final Path root = Paths.get("/home/tv/git/faust-gen/target/www/transcript/diplomatic/2_H"); - for (int i = 1; i < 390; i++) { - try { - merger.addSource(root.resolve(String.format("page_%d.pdf", i)).toFile()); - } catch (FileNotFoundException e) { - System.err.println("file not found" + e.getMessage()); - } - } - try { - merger.mergeDocuments(MemoryUsageSetting.setupMainMemoryOnly()); - } catch (IOException e) { - e.printStackTrace(); - } - } - -}