From defc515d6efc439ea3a9535d8355d14c6b6432e5 Mon Sep 17 00:00:00 2001
From: Horia Chiorean <hchiorea@redhat.com>
Date: Wed, 7 Dec 2016 17:36:49 +0200
Subject: [PATCH] MODE-2648 Upgrades Apache Tika to 1.14 and PdfBox to 2.0.3 to
 avoid CVEs The major update to PdfBox does trigger some other changes around
 the PDF sequencer: previous versions allowed some metadata to be extracted
 for encrypted PDFs, while the current version doesn't even allow reading the
 stream. As such, the behavior of the sequencer has been updated to reflect
 this.

---
 boms/modeshape-bom-embedded/pom.xml           |   4 +-
 .../org/apache/tika/{1.12 => 1.14}/module.xml |  26 +++--
 .../modeshape/sequencer/pdf/main/module.xml   |   5 +-
 .../extractor/tika/TikaTextExtractor.java     | 106 +++++++++---------
 .../resources/assemblies/jboss-wf-dist.xml    |  13 +--
 modeshape-parent/pom.xml                      |  60 +++++-----
 .../sequencer/pdf/PdfBasicMetadata.java       |   9 +-
 .../sequencer/pdf/PdfMetadataSequencer.java   |  20 +++-
 .../pdf/PdfMetadataSequencerTest.java         |  31 ++---
 9 files changed, 137 insertions(+), 137 deletions(-)
 rename deploy/jbossas/kit/jboss-wf/org/apache/tika/{1.12 => 1.14}/module.xml (67%)
diff --git a/boms/modeshape-bom-embedded/pom.xml b/boms/modeshape-bom-embedded/pom.xml
index 18ace5e4e6..dd1044a81c 100644
--- a/boms/modeshape-bom-embedded/pom.xml
+++ b/boms/modeshape-bom-embedded/pom.xml
@@ -63,7 +63,7 @@
         <version.org.mongodb.mongo-java-driver>2.14.0</version.org.mongodb.mongo-java-driver>
         <version.com.datastax.cassandra>2.0.0-rc2</version.com.datastax.cassandra>
         <version.com.amazonaws>1.11.24</version.com.amazonaws>
-        <version.org.apache.tika>1.12</version.org.apache.tika>
+        <version.org.apache.tika>1.14</version.org.apache.tika>
         <version.org.eclipse.equinox.common>3.3.0-v20070426</version.org.eclipse.equinox.common>
         <version.org.eclipse.jdt.core>3.3.0-v_771</version.org.eclipse.jdt.core>
         <version.org.eclipse.core.resources>3.3.0-v20070604</version.org.eclipse.core.resources>
@@ -81,7 +81,7 @@
         <version.org.eclipse.emf.ecore-xmi>2.4.1</version.org.eclipse.emf.ecore-xmi>
         <version.org.javassist>3.18.1-GA</version.org.javassist>
         <version.org.jaudiotagger>2.0.3</version.org.jaudiotagger>
-        <version.org.apache.poi>3.13</version.org.apache.poi>
+        <version.org.apache.poi>3.15</version.org.apache.poi>
         <version.com.beust.jcommander>1.5</version.com.beust.jcommander>
         <version.wsdl4j>1.6.3</version.wsdl4j>
         <version.org.apache.lucene>6.0.0</version.org.apache.lucene>
diff --git a/deploy/jbossas/kit/jboss-wf/org/apache/tika/1.12/module.xml b/deploy/jbossas/kit/jboss-wf/org/apache/tika/1.14/module.xml
similarity index 67%
rename from deploy/jbossas/kit/jboss-wf/org/apache/tika/1.12/module.xml
rename to deploy/jbossas/kit/jboss-wf/org/apache/tika/1.14/module.xml
index 0411e3867a..870890dd58 100644
--- a/deploy/jbossas/kit/jboss-wf/org/apache/tika/1.12/module.xml
+++ b/deploy/jbossas/kit/jboss-wf/org/apache/tika/1.14/module.xml
@@ -16,32 +16,34 @@
   ~ See the License for the specific language governing permissions and
   ~ limitations under the License.
 -->
-<module xmlns="urn:jboss:module:1.3" name="org.apache.tika" slot="1.12">
+<module xmlns="urn:jboss:module:1.3" name="org.apache.tika" slot="${version.org.apache.tika}">
     <resources>
-        <resource-root path="tika-core-1.12.jar" />
-        <resource-root path="tika-parsers-1.12.jar" />
+        <resource-root path="tika-core-${version.org.apache.tika}.jar" />
+        <resource-root path="tika-parsers-${version.org.apache.tika}.jar" />
         <!--All the following are specific to Tika Should the tika version change, those need to change as well-->
         <resource-root path="asm-5.0.4.jar" />
         <resource-root path="apache-mime4j-core-0.7.2.jar" />
         <resource-root path="apache-mime4j-dom-0.7.2.jar" />
-        <resource-root path="commons-codec-1.9.jar" />
-        <resource-root path="commons-compress-1.10.jar" />
+        <resource-root path="commons-codec-1.10.jar" />
+        <resource-root path="commons-compress-1.12.jar" />
+        <resource-root path="commons-collections4-4.1.jar" />
+        <resource-root path="commons-logging-1.2.jar" />
         <resource-root path="commons-exec-1.3.jar" />
-        <resource-root path="commons-io-2.4.jar" />
-        <resource-root path="commons-logging-1.1.1.jar" />
-        <resource-root path="fontbox-1.8.10.jar" />
-        <resource-root path="jempbox-1.8.10.jar" />
+        <resource-root path="commons-io-2.5.jar" />
+        <resource-root path="fontbox-${version.org.apache.pdfbox}.jar" />
+        <resource-root path="jempbox-1.8.12.jar" />
         <resource-root path="juniversalchardet-1.0.3.jar" />
-        <resource-root path="pdfbox-1.8.10.jar" />
+        <resource-root path="pdfbox-${version.org.apache.pdfbox}.jar" />
         <resource-root path="poi-${version.org.apache.poi}.jar" />
         <resource-root path="poi-ooxml-${version.org.apache.poi}.jar" />
         <resource-root path="poi-ooxml-schemas-${version.org.apache.poi}.jar" />
         <resource-root path="poi-scratchpad-${version.org.apache.poi}.jar" />
         <resource-root path="xmlbeans-2.6.0.jar" />
         <resource-root path="tagsoup-1.2.1.jar" />
-        <resource-root path="vorbis-java-core-0.6.jar" />
-        <resource-root path="vorbis-java-tika-0.6.jar" />
+        <resource-root path="vorbis-java-core-0.8.jar" />
+        <resource-root path="vorbis-java-tika-0.8.jar" />
         <resource-root path="xz-1.5.jar" />
+        <resource-root path="jackson-core-2.8.1.jar" />
     </resources>
     <dependencies>
         <module name="javax.api" export="true"/>
diff --git a/deploy/jbossas/kit/jboss-wf/org/modeshape/sequencer/pdf/main/module.xml b/deploy/jbossas/kit/jboss-wf/org/modeshape/sequencer/pdf/main/module.xml
index dee389e69c..70dec6f43f 100644
--- a/deploy/jbossas/kit/jboss-wf/org/modeshape/sequencer/pdf/main/module.xml
+++ b/deploy/jbossas/kit/jboss-wf/org/modeshape/sequencer/pdf/main/module.xml
@@ -19,13 +19,12 @@
     <resources>
         <resource-root path="modeshape-sequencer-pdf-${project.version}.jar" />
         <resource-root path="pdfbox-${version.org.apache.pdfbox}.jar" />
-        <resource-root path="jempbox-${version.org.apache.pdfbox}.jar" />
         <resource-root path="xmpbox-${version.org.apache.pdfbox}.jar" />
-        <resource-root path="fontbox-${version.org.apache.pdfbox}.jar" />
-        <resource-root path="commons-logging-1.1.1.jar" />
+        <resource-root path="commons-logging-1.2.jar" />
     </resources>
 
     <dependencies>
+        <module name="javax.xml.bind.api"/>
         <module name="org.modeshape.jcr.api"/>
         <module name="org.modeshape.common"/>
     </dependencies>
diff --git a/extractors/modeshape-extractor-tika/src/main/java/org/modeshape/extractor/tika/TikaTextExtractor.java b/extractors/modeshape-extractor-tika/src/main/java/org/modeshape/extractor/tika/TikaTextExtractor.java
index 0f0a2879a1..ef55d1f922 100644
--- a/extractors/modeshape-extractor-tika/src/main/java/org/modeshape/extractor/tika/TikaTextExtractor.java
+++ b/extractors/modeshape-extractor-tika/src/main/java/org/modeshape/extractor/tika/TikaTextExtractor.java
@@ -16,15 +16,15 @@
 package org.modeshape.extractor.tika;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.atomic.AtomicReference;
 import javax.jcr.RepositoryException;
+import org.apache.tika.config.ServiceLoader;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -83,9 +83,9 @@ public class TikaTextExtractor extends TextExtractor {
             MediaType.application("x-tar"), MediaType.application("zip"), MediaType.application("vnd.teiid.vdb"),
             MediaType.image("*"), MediaType.audio("*"), MediaType.video("*"));
 
-    private final Set<MediaType> excludedMediaTypes = new HashSet<MediaType>();
-    private final Set<MediaType> includedMediaTypes = new HashSet<MediaType>();
-    private final Set<MediaType> parserSupportedMediaTypes = new HashSet<MediaType>();
+    private final Set<MediaType> excludedMediaTypes = new HashSet<>();
+    private final Set<MediaType> includedMediaTypes = new HashSet<>();
+    private final Set<MediaType> parserSupportedMediaTypes = new HashSet<>();
 
     /**
      * The write limit for the Tika parser, representing the maximum number of characters that should be extracted by the
@@ -93,8 +93,7 @@ public class TikaTextExtractor extends TextExtractor {
      */
     private Integer writeLimit;
 
-    private final Lock initLock = new ReentrantLock();
-    private DefaultParser parser;
+    private final AtomicReference<DefaultParser> parser = new AtomicReference<>();
 
     /**
      * No-arg constructor is required because this is instantiated by reflection.
@@ -130,33 +129,30 @@ public void extractFrom( final Binary binary,
 
         final DefaultParser parser = initialize();
         final Integer writeLimit = this.writeLimit;
-        processStream(binary, new BinaryOperation<Object>() {
-            @Override
-            public Object execute( InputStream stream ) throws Exception {
-                Metadata metadata = prepareMetadata(binary, context);
-                //TODO author=Horia Chiorean date=1/30/13 description=//TIKA 1.2 TXTParser seems to have a bug, always adding 1 ignorable whitespace to the actual chars to be parsed
-                //https://issues.apache.org/jira/browse/TIKA-1069
-                ContentHandler textHandler = writeLimit == null ? new BodyContentHandler() : new BodyContentHandler(writeLimit + 1);
-                try {
-                    LOGGER.debug("Using TikaTextExtractor to extract text");
-                    // Parse the input stream ...
-                    parser.parse(stream, textHandler, metadata, new ParseContext());
-                } catch (SAXException sae) {
-                    LOGGER.warn(TikaI18n.parseExceptionWhileExtractingText, sae.getMessage());
-                } catch (NoClassDefFoundError ncdfe) {
-                    LOGGER.warn(TikaI18n.warnNoClassDefFound, ncdfe.getMessage());
-                } catch (Throwable e) {
-                    LOGGER.error(e, TikaI18n.errorWhileExtractingTextFrom, e.getMessage());
-                } finally {
-                    // Record all of the text in the body ...
-                    String text = textHandler.toString().trim();
-                    if (!StringUtil.isBlank(text)) {
-                        output.recordText(text);
-                        LOGGER.debug("TikaTextExtractor found text: " + text);
-                    }
+        processStream(binary, stream -> {
+            Metadata metadata = prepareMetadata(binary, context);
+            //TODO author=Horia Chiorean date=1/30/13 description=//TIKA 1.2 TXTParser seems to have a bug, always adding 1 ignorable whitespace to the actual chars to be parsed
+            //https://issues.apache.org/jira/browse/TIKA-1069
+            ContentHandler textHandler = writeLimit == null ? new BodyContentHandler() : new BodyContentHandler(writeLimit + 1);
+            try {
+                LOGGER.debug("Using TikaTextExtractor to extract text");
+                // Parse the input stream ...
+                parser.parse(stream, textHandler, metadata, new ParseContext());
+            } catch (SAXException sae) {
+                LOGGER.warn(TikaI18n.parseExceptionWhileExtractingText, sae.getMessage());
+            } catch (NoClassDefFoundError ncdfe) {
+                LOGGER.warn(TikaI18n.warnNoClassDefFound, ncdfe.getMessage());
+            } catch (Throwable e) {
+                LOGGER.error(e, TikaI18n.errorWhileExtractingTextFrom, e.getMessage());
+            } finally {
+                // Record all of the text in the body ...
+                String text = textHandler.toString().trim();
+                if (!StringUtil.isBlank(text)) {
+                    output.recordText(text);
+                    LOGGER.debug("TikaTextExtractor found text: " + text);
                 }
-                return null;
             }
+            return null;
         });
 
     }
@@ -185,36 +181,34 @@ protected final Metadata prepareMetadata( final Binary binary,
         }
         return metadata;
     }
-
+    
     /**
      * This class lazily initializes the {@link DefaultParser} instance.
-     * 
+     *
      * @return the default parser; same as {@link #parser}
      */
     protected DefaultParser initialize() {
-        if (parser == null) {
-            initLock.lock();
-            try {
-                if (parser == null) {
-                    parser = new DefaultParser(this.getClass().getClassLoader());
-                }
-                LOGGER.debug("Initializing Tika Text Extractor");
-                Map<MediaType, Parser> parsers = parser.getParsers();
-                LOGGER.debug("Tika parsers found: {0}",parsers.size());
-                for (MediaType mediaType : parsers.keySet()) {
-                    parserSupportedMediaTypes.add(mediaType);
-                    LOGGER.debug("Tika Text Extractor will support the {0} media-type",mediaType);
-                }
-                convertStringMimeTypesToMediaTypes(getExcludedMimeTypes(), excludedMediaTypes);
-                convertStringMimeTypesToMediaTypes(getIncludedMimeTypes(), includedMediaTypes);
-                LOGGER.debug("Initialized {0}", this);
-            } finally {
-                initLock.unlock();
-            }
+        parser.compareAndSet(null, newDefaultParser());
+        return parser.get();
+    }
+    
+    private DefaultParser newDefaultParser() {
+        ServiceLoader serviceLoader = new ServiceLoader(this.getClass().getClassLoader(),
+                                                        (classname, throwable) -> LOGGER.debug(throwable, "error while loading parser for {0}", classname));
+        DefaultParser defaultParser = new DefaultParser(MediaTypeRegistry.getDefaultRegistry(), serviceLoader);
+        LOGGER.debug("Initializing Tika Text Extractor");
+        Map<MediaType, Parser> parsers = defaultParser.getParsers();
+        LOGGER.debug("Tika parsers found: {0}",parsers.size());
+        for (MediaType mediaType : parsers.keySet()) {
+            parserSupportedMediaTypes.add(mediaType);
+            LOGGER.debug("Tika Text Extractor will support the {0} media-type",mediaType);
         }
-        return parser;
+        convertStringMimeTypesToMediaTypes(getExcludedMimeTypes(), excludedMediaTypes);
+        convertStringMimeTypesToMediaTypes(getIncludedMimeTypes(), includedMediaTypes);
+        LOGGER.debug("Initialized {0}", this);
+        return defaultParser;
     }
-
+    
     private void convertStringMimeTypesToMediaTypes(Set<String> mimeTypes, Set<MediaType> mediaTypes) {
         for (String mimeTypeEntry : mimeTypes) {
             //allow each mime type entry to be an array in itself
diff --git a/modeshape-assembly-descriptors/src/main/resources/assemblies/jboss-wf-dist.xml b/modeshape-assembly-descriptors/src/main/resources/assemblies/jboss-wf-dist.xml
index 123b3bec82..7205ee40b5 100644
--- a/modeshape-assembly-descriptors/src/main/resources/assemblies/jboss-wf-dist.xml
+++ b/modeshape-assembly-descriptors/src/main/resources/assemblies/jboss-wf-dist.xml
@@ -179,6 +179,7 @@
                 <include>org.ccil.cowan.tagsoup:*:jar</include>
                 <include>org.gagravarr:*:jar</include>
                 <include>org.ow2.asm:asm:jar</include>
+                <include>com.fasterxml.jackson.core:jackson-core:jar</include>
                 <!-- 
                 The following are not currently included by default; see ModeShape's parent POM
                 <include>org.bouncycastle:*:jar</include>
@@ -263,16 +264,6 @@
                 <include>org:jaudiotagger:jar</include>
             </includes>
         </dependencySet>
-      
-        <dependencySet>
-            <useProjectArtifact>false</useProjectArtifact>
-            <outputDirectory>modules/org/modeshape/sequencer/pdf/main</outputDirectory>
-            <includes>
-                <include>org.modeshape:modeshape-sequencer-pdf:jar</include>
-                <include>org.apache.pdfbox:*:jar</include>
-                <include>commons-logging:commons-logging:jar</include>
-            </includes>
-        </dependencySet> 
        
         <dependencySet>
             <useProjectArtifact>false</useProjectArtifact>
@@ -367,11 +358,13 @@
         </dependencySet>
 
         <dependencySet>
+            <useProjectArtifact>false</useProjectArtifact>
             <outputDirectory>modules/org/modeshape/sequencer/pdf/main</outputDirectory>
             <includes>
                 <include>org.modeshape:modeshape-sequencer-pdf:jar</include>
                 <include>org.apache.pdfbox:pdfbox:jar</include>
                 <include>org.apache.pdfbox:xmpbox:jar</include>
+                <include>commons-logging:commons-logging:jar</include>
             </includes>
         </dependencySet>
 
diff --git a/modeshape-parent/pom.xml b/modeshape-parent/pom.xml
index eaac797f53..ddedbcfe76 100644
--- a/modeshape-parent/pom.xml
+++ b/modeshape-parent/pom.xml
@@ -145,7 +145,7 @@
         <version.com.googlecode.sardine>146</version.com.googlecode.sardine>
         <version.org.javassist>3.18.1-GA</version.org.javassist>
         <version.org.jaudiotagger>2.0.3</version.org.jaudiotagger>
-        <version.org.apache.poi>3.13</version.org.apache.poi>  <!-- Try to match what's in Tika -->
+        <version.org.apache.poi>3.15</version.org.apache.poi>  <!-- Try to match what's in Tika -->
         <version.com.beust.jcommander>1.5</version.com.beust.jcommander>
         <version.wsdl4j>1.6.3</version.wsdl4j>
         <version.de.undercouch.bson4jackson>1.1.2</version.de.undercouch.bson4jackson>
@@ -170,7 +170,7 @@
         <version.org.eclipse.emf.ecore-change>2.2.3</version.org.eclipse.emf.ecore-change>
         <version.org.eclipse.emf.ecore-xmi>2.4.1</version.org.eclipse.emf.ecore-xmi>
         <!-- nightmare end -->        
-        <version.org.apache.tika>1.12</version.org.apache.tika>
+        <version.org.apache.tika>1.14</version.org.apache.tika>
         <version.com.drewnoakes.metadata-extractor>2.6.2</version.com.drewnoakes.metadata-extractor>
         <version.org.apache.lucene>6.0.0</version.org.apache.lucene>
         <version.io.humble>0.2.1</version.io.humble>
@@ -184,7 +184,7 @@
         <version.org.jboss.ironjacamar>1.0.13.Final</version.org.jboss.ironjacamar>
         <version.com.datastax.cassandra>2.0.0-rc2</version.com.datastax.cassandra>
         <version.org.apache.cassandra>2.0.3</version.org.apache.cassandra>
-        <version.org.apache.pdfbox>1.8.10</version.org.apache.pdfbox>
+        <version.org.apache.pdfbox>2.0.3</version.org.apache.pdfbox>
         
         <version.com.h2>1.4.191</version.com.h2>
         <version.postgresql.9>9.2-1002.jdbc4</version.postgresql.9>
@@ -1695,11 +1695,11 @@
                     <exclusion>
                         <groupId>com.healthmarketscience.jackcess</groupId>
                         <artifactId>jackcess</artifactId>
-                    </exclusion> 
+                    </exclusion>
                     <exclusion>
                         <groupId>com.healthmarketscience.jackcess</groupId>
                         <artifactId>jackcess-encrypt</artifactId>
-                    </exclusion>  
+                    </exclusion>
                     <exclusion>
                         <groupId>org.codelibs</groupId>
                         <artifactId>jhighlight</artifactId>
@@ -1707,27 +1707,31 @@
                     <exclusion>
                         <groupId>org.apache.cxf</groupId>
                         <artifactId>cxf-rt-rs-client</artifactId>
-                    </exclusion>  
+                    </exclusion>
                     <exclusion>
                         <groupId>org.apache.opennlp</groupId>
                         <artifactId>opennlp-tools</artifactId>
-                    </exclusion> 
+                    </exclusion>
                     <exclusion>
                         <groupId>org.apache.opennlp</groupId>
                         <artifactId>opennlp-tools</artifactId>
-                    </exclusion> 
+                    </exclusion>
                     <exclusion>
                         <groupId>org.json</groupId>
                         <artifactId>json</artifactId>
-                    </exclusion>   
+                    </exclusion>
                     <exclusion>
                         <groupId>com.google.code.gson</groupId>
                         <artifactId>gson</artifactId>
-                    </exclusion>   
+                    </exclusion>
+                    <exclusion>
+                        <groupId>com.googlecode.json-simple</groupId>
+                        <artifactId>json-simple</artifactId>
+                    </exclusion>
                     <exclusion>
                         <groupId>org.apache.sis.core</groupId>
                         <artifactId>sis-utility</artifactId>
-                    </exclusion>  
+                    </exclusion>
                     <exclusion>
                         <groupId>org.apache.sis.core</groupId>
                         <artifactId>sis-metadata</artifactId>
@@ -1735,11 +1739,11 @@
                     <exclusion>
                         <groupId>org.apache.sis.storage</groupId>
                         <artifactId>sis-netcdf</artifactId>
-                    </exclusion>  
+                    </exclusion>
                     <exclusion>
                         <groupId>org.opengis</groupId>
                         <artifactId>geoapi</artifactId>
-                    </exclusion>  
+                    </exclusion>
                     <exclusion>
                         <groupId>com.googlecode.mp4parser</groupId>
                         <artifactId>isoparser</artifactId>
@@ -1747,7 +1751,7 @@
                     <exclusion>
                         <groupId>com.github.junrar</groupId>
                         <artifactId>junrar</artifactId>
-                    </exclusion> 
+                    </exclusion>
                     <exclusion>
                         <groupId>org.apache.commons</groupId>
                         <artifactId>commons-csv</artifactId>
@@ -1767,15 +1771,15 @@
                     <exclusion>
                         <groupId>edu.ucar</groupId>
                         <artifactId>netcdf4</artifactId>
-                    </exclusion>   
+                    </exclusion>
                     <exclusion>
                         <groupId>edu.ucar</groupId>
                         <artifactId>grib</artifactId>
-                    </exclusion>    
+                    </exclusion>
                     <exclusion>
                         <groupId>edu.ucar</groupId>
                         <artifactId>cdm</artifactId>
-                    </exclusion>  
+                    </exclusion>
                     <exclusion>
                         <groupId>edu.ucar</groupId>
                         <artifactId>httpservices</artifactId>
@@ -1816,12 +1820,12 @@
                     We exclude them by default. 
                     -->
                     <exclusion>
-                      <groupId>org.bouncycastle</groupId>
-                      <artifactId>bcmail-jdk15on</artifactId>
+                        <groupId>org.bouncycastle</groupId>
+                        <artifactId>bcmail-jdk15on</artifactId>
                     </exclusion>
                     <exclusion>
-                      <groupId>org.bouncycastle</groupId>
-                      <artifactId>bcprov-jdk15on</artifactId>
+                        <groupId>org.bouncycastle</groupId>
+                        <artifactId>bcprov-jdk15on</artifactId>
                     </exclusion>
                     <!--
                     Outlook PST files for emails are likely not used,
@@ -1832,13 +1836,19 @@
                         <artifactId>java-libpst</artifactId>
                     </exclusion>
                     <!--
-                    Matlab files are likely not used,
-                    so exclude this library by default.
-                    -->
+                   Matlab files are likely not used, so exclude this library by default.
+                   -->
                     <exclusion>
-                        <groupId>net.sourceforge.jmatio</groupId>
+                        <groupId>org.tallison</groupId>
                         <artifactId>jmatio</artifactId>
                     </exclusion>
+                    <!--
+                        PDF box tools not required
+                    -->
+                    <exclusion>
+                        <groupId>org.apache.pdfbox</groupId>
+                        <artifactId>pdfbox-tools</artifactId>
+                    </exclusion>
                 </exclusions>
             </dependency>
             <!--
diff --git a/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfBasicMetadata.java b/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfBasicMetadata.java
index c35d29f5a9..5c0b59ff9b 100644
--- a/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfBasicMetadata.java
+++ b/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfBasicMetadata.java
@@ -20,16 +20,15 @@
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.List;
-
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
 import org.apache.pdfbox.pdmodel.PDDocumentInformation;
 import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDPageable;
 import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
 import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationFileAttachment;
+import org.apache.pdfbox.printing.PDFPageable;
 
 /**
  * Utility for extracting Document Information Directory metadata from PDF files.
@@ -69,7 +68,7 @@ public PdfBasicMetadata( InputStream inputStream ) {
     public boolean check() throws Exception {
         try (PDDocument document = PDDocument.load(in)) {
             PDDocumentCatalog catalog = document.getDocumentCatalog();
-            PDPageable pageable = new PDPageable(document);
+            PDFPageable pageable = new PDFPageable(document);
             PageFormat firstPage = pageable.getPageFormat(0);
 
             encrypted = document.isEncrypted();
@@ -97,7 +96,7 @@ public boolean check() throws Exception {
 
             // extract all attached files from all pages
             int pageNumber = 0;
-            for (Object page : catalog.getAllPages()) {
+            for (Object page : catalog.getPages()) {
                 pageNumber += 1;
                 PdfPageMetadata pageMetadata = new PdfPageMetadata();
                 pageMetadata.setPageNumber(pageNumber);
@@ -114,7 +113,7 @@ public boolean check() throws Exception {
                         attachmentMetadata.setCreationDate(embeddedFile.getCreationDate());
                         attachmentMetadata.setModificationDate(embeddedFile.getModDate());
                         attachmentMetadata.setMimeType(embeddedFile.getSubtype());
-                        attachmentMetadata.setData(embeddedFile.getByteArray());
+                        attachmentMetadata.setData(embeddedFile.toByteArray());
 
                         pageMetadata.addAttachment(attachmentMetadata);
                     }
diff --git a/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfMetadataSequencer.java b/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfMetadataSequencer.java
index 531cd35f6f..d659ccc84e 100644
--- a/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfMetadataSequencer.java
+++ b/sequencers/modeshape-sequencer-pdf/src/main/java/org/modeshape/sequencer/pdf/PdfMetadataSequencer.java
@@ -106,11 +106,23 @@ public boolean execute( Property inputProperty,
         Binary binaryValue = inputProperty.getBinary();
         CheckArg.isNotNull(binaryValue, "binary");
         Node sequencedNode = getPdfMetadataNode(outputNode);
-        boolean isBasicMetadataParsed = processBasicMetadata(sequencedNode, binaryValue);
-        if (isBasicMetadataParsed) {
-            processXMPMetadata(sequencedNode, binaryValue);
+        try {
+            if (processBasicMetadata(sequencedNode, binaryValue)) {
+                processXMPMetadata(sequencedNode, binaryValue);
+                return true;
+            } else {
+                getLogger().warn("Ignoring pdf from node {0} because basic metadata cannot be extracted",
+                                 inputProperty.getParent().getPath());
+                return false;
+            }
+        } catch (java.lang.NoClassDefFoundError ncdfe) {
+            if (ncdfe.getMessage().toLowerCase().contains("bouncycastle")) {
+                getLogger().warn("Ignoring pdf from node {0} because it's encrypted and encrypted PDFs are not supported", 
+                                 inputProperty.getParent().getPath());
+                return false;
+            }
+            throw ncdfe;
         }
-        return true;
     }
 
     private boolean processBasicMetadata( Node sequencedNode,
diff --git a/sequencers/modeshape-sequencer-pdf/src/test/java/org/modeshape/sequencer/pdf/PdfMetadataSequencerTest.java b/sequencers/modeshape-sequencer-pdf/src/test/java/org/modeshape/sequencer/pdf/PdfMetadataSequencerTest.java
index def125409e..fa3de70049 100644
--- a/sequencers/modeshape-sequencer-pdf/src/test/java/org/modeshape/sequencer/pdf/PdfMetadataSequencerTest.java
+++ b/sequencers/modeshape-sequencer-pdf/src/test/java/org/modeshape/sequencer/pdf/PdfMetadataSequencerTest.java
@@ -17,37 +17,35 @@
 
 import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertThat;
+import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ATTACHMENT_NODE;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.AUTHOR;
+import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.CREATION_DATE;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.CREATOR;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ENCRYPTED;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.KEYWORDS;
+import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.MODIFICATION_DATE;
+import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.NAME;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ORIENTATION;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_COUNT;
+import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_NODE;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_NUMBER;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PRODUCER;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.SUBJECT;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.TITLE;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.VERSION;
 import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.XMP_NODE;
-import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.PAGE_NODE;
-import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.ATTACHMENT_NODE;
-import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.NAME;
-import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.CREATION_DATE;
-import static org.modeshape.sequencer.pdf.PdfMetadataLexicon.MODIFICATION_DATE;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.BASE_URL;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.CREATE_DATE;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.CREATOR_TOOL;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.IDENTIFIER;
+import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.LABEL;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.METADATA_DATE;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.MODIFY_DATE;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.NICKNAME;
 import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.RATING;
-import static org.modeshape.sequencer.pdf.XmpMetadataLexicon.LABEL;
 
 import java.util.Calendar;
-
 import javax.jcr.Node;
-
 import org.junit.Test;
 import org.modeshape.jcr.api.JcrConstants;
 import org.modeshape.jcr.sequencer.AbstractSequencerTest;
@@ -110,24 +108,17 @@ public void shouldSequenceXMPMetadata() throws Exception {
     }
 
     @Test
-    public void shouldSequenceEncryptedPdf() throws Exception {
+    public void shouldNotSequenceEncryptedPdf() throws Exception {
         // GIVEN
         String filename = "sample-encrypted.pdf";
 
         // WHEN
         createNodeWithContentFromFile(filename, filename);
 
-        // THEN
-        Node sequencedNode = getOutputNode(rootNode, "sequenced/pdf/" + filename);
-        assertThat(sequencedNode.getProperty(JcrConstants.JCR_MIME_TYPE).getString(), is("application/pdf"));
-        assertThat(sequencedNode.getProperty(PAGE_COUNT).getLong(), is(2L));
-        assertThat(sequencedNode.getProperty(ORIENTATION).getString(), is("portrait"));
-        assertThat(sequencedNode.getProperty(ENCRYPTED).getBoolean(), is(true));
-        assertThat(sequencedNode.getProperty(VERSION).getString(), is("1.4"));
-
-        assertThat(sequencedNode.hasProperty("Author"), is(false));
-
-        assertThat(sequencedNode.hasNode(XMP_NODE), is(false));
+        // as of MODE-2648 and PdfBox 2.x encrypted PDFs are not parseable since the API and dependencies have changed
+        // it turns out that prior to 2.x some basic metadata was still available; this is not the case anymore
+        Thread.sleep(100);
+        assertNoNode("/sequenced/pdf/" + filename);
     }
 
     @Test