Skip to content

Commit

Permalink
Add support for protected PDFs and minor tweaks at PDFContentImporter
Browse files Browse the repository at this point in the history
 * add BouncyCastle library
 * remove BouncyCastle installation warning string
 * provide meta inforamtion on encrypted.pdf
 * add test file write-protected.pdf
 * Fix BibtexEntryAssert.assertEquals(Class, STring, BibEntry)
 * PDFContentImporter:
  * Do not fill review field any more - this was used for debugging
  * Do not write empty keyword field
  • Loading branch information
koppor committed Mar 11, 2016
1 parent c61bdbe commit 91c8af0
Show file tree
Hide file tree
Showing 34 changed files with 164 additions and 125 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ to [sourceforge feature requests](https://sourceforge.net/p/jabref/features/) by
- Fixed [#855](https://github.com/JabRef/jabref/issues/856): Fixed OpenOffice Manual connect - Clicking on browse does now work correctly
- Fixed [#649](https://github.com/JabRef/jabref/issues/649): Key bindings are now working in the preview panel
- Fixed [#410](https://github.com/JabRef/jabref/issues/410): Find unlinked files no longer freezes when extracting entry from PDF content
- Fixed [#935](https://github.com/JabRef/jabref/issues/935): PDFs, which are readable, but carry a protection for editing, are treated by the XMP parser and the importer generating a BibTeX entry based on the content.

### Removed
- Fixed [#627](https://github.com/JabRef/jabref/issues/627): The pdf field is removed from the export formats, use the file field
Expand Down
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ dependencies {
compile 'org.apache.pdfbox:pdfbox:1.8.11'
compile 'org.apache.pdfbox:fontbox:1.8.11'
compile 'org.apache.pdfbox:jempbox:1.8.11'
compile 'org.bouncycastle:bcprov-jdk15on:1.54'

compile 'commons-cli:commons-cli:1.3.1'

Expand Down
5 changes: 5 additions & 0 deletions external-libraries.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ Project: Apache PDFBox
URL: http://pdfbox.apache.org
License: Apache 2.0

Id: org.bouncycastle:bcprov-jdk15on
Project: The Legion of the Bouncy Castle
URL: https://www.bouncycastle.org/
License: MIT

Id: org.jsoup:jsoup
Project: jsoup
URL: https://github.com/jhy/jsoup/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
import net.sf.jabref.importer.ImportInspector;
import net.sf.jabref.importer.OutputPrinter;
import net.sf.jabref.importer.fetcher.DOItoBibTeXFetcher;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.logic.util.DOI;
import net.sf.jabref.logic.xmp.EncryptionNotSupportedException;
import net.sf.jabref.logic.xmp.XMPUtil;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.BibtexEntryTypes;
import net.sf.jabref.model.entry.EntryType;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand All @@ -17,6 +20,7 @@
import java.io.InputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -182,12 +186,7 @@ public boolean isRecognizedFormat(InputStream in) throws IOException {
public List<BibEntry> importEntries(InputStream in, OutputPrinter status) throws IOException {
final ArrayList<BibEntry> result = new ArrayList<>(1);

try (PDDocument document = PDDocument.load(in)) {
if (document.isEncrypted()) {
LOGGER.info("Encrypted documents are not supported");
return result;
}

try (PDDocument document = XMPUtil.loadWithAutomaticDecryption(in)) {
String firstPageContents = getFirstPageContents(document);

Optional<DOI> doi = DOI.findInText(firstPageContents);
Expand Down Expand Up @@ -447,7 +446,7 @@ public void addEntry(BibEntry entry) {
if (abstractT != null) {
entry.setField("abstract", abstractT);
}
if (keywords != null) {
if (StringUtils.isNotEmpty(keywords)) {
entry.setField("keywords", keywords);
}
if (title != null) {
Expand Down Expand Up @@ -478,9 +477,10 @@ public void addEntry(BibEntry entry) {
entry.setField("publisher", publisher);
}

entry.setField("review", firstPageContents);

result.add(entry);
} catch (EncryptionNotSupportedException e) {
LOGGER.info("Decryption not supported");
return Collections.EMPTY_LIST;
}
return result;
}
Expand Down
1 change: 1 addition & 0 deletions src/main/java/net/sf/jabref/logic/util/io/XMLUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* Currently used for debugging only
*/
public class XMLUtil {

private static final Log LOGGER = LogFactory.getLog(XMLUtil.class);

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright (C) 2003-2011 JabRef contributors.
/* Copyright (C) 2003-2016 JabRef contributors.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
Expand All @@ -18,7 +18,5 @@
import java.io.IOException;

public class EncryptionNotSupportedException extends IOException {
public EncryptionNotSupportedException(String string) {
super(string);
}
// no additional information needed
}
57 changes: 41 additions & 16 deletions src/main/java/net/sf/jabref/logic/xmp/XMPUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,13 @@
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.encryption.BadSecurityHandlerException;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.w3c.dom.Document;

/**
Expand Down Expand Up @@ -120,6 +123,39 @@ public static List<BibEntry> readXMP(File file) throws IOException {
return res;
}

public static PDDocument loadWithAutomaticDecryption(InputStream inputStream)
throws IOException, EncryptionNotSupportedException {
PDDocument doc = PDDocument.load(inputStream);
if (doc.isEncrypted()) {
// try the empty string as user password
StandardDecryptionMaterial sdm = new StandardDecryptionMaterial("");
try {
doc.openProtection(sdm);
} catch (BadSecurityHandlerException | CryptographyException e) {
LOGGER.error("Cannot handle encrypted PDF: " + e.getMessage());
throw new EncryptionNotSupportedException();
} catch (NoClassDefFoundError e) {
// This is to avoid following exception:
// Exception in thread "JabRef CachedThreadPool" java.lang.NoClassDefFoundError: org/bouncycastle/jce/provider/BouncyCastleProvider
// at org.apache.pdfbox.pdmodel.PDDocument.openProtection(PDDocument.java:1611)
// at net.sf.jabref.logic.xmp.XMPUtil.loadWithAutomaticDecryption(XMPUtil.java:133)
// This exception occurs if JabRef is compiled without 'org.bouncycastle:bcprov-jdk15on' (meaning, without the BouncyCastle library), which may happen in some countries not allowing cryptography.
// See for instance http://www.bouncycastle.org/wiki/display/JA1/Frequently+Asked+Questions#FrequentlyAskedQuestions-11.WhatisBouncyCastle%27sexportclassificationintheUnitedStatesofAmerica?
// See also https://sourceforge.net/p/jabref/bugs/1257/ and http://stackoverflow.com/a/2929228/873282
if (e.getMessage().equals("org/bouncycastle/jce/provider/BouncyCastleProvider")) {
LOGGER.warn(
"Java Bouncy Castle library not found. This might have been removed due redistribution restrictions. Please download and install it. For more information see http://www.bouncycastle.org/.");
// We convert it to a EncryptionNotSupportedException as this is handled properly by the caller
throw new EncryptionNotSupportedException();
} else {
// we really cannot deal with it
throw e;
}
}
}
return doc;
}

/**
* Try to read the given BibTexEntry from the XMP-stream of the given
* inputstream containing a PDF-file.
Expand All @@ -138,11 +174,7 @@ public static List<BibEntry> readXMP(InputStream inputStream)

List<BibEntry> result = new LinkedList<>();

try (PDDocument document = PDDocument.load(inputStream)) {
if (document.isEncrypted()) {
throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
}

try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
XMPMetadata meta = XMPUtil.getXMPMetadata(document);

// If we did not find any XMP metadata, search for non XMP metadata
Expand Down Expand Up @@ -509,13 +541,8 @@ public static String toXMP(Collection<BibEntry> bibtexEntries,
* @throws IOException
*/
private static XMPMetadata readRawXMP(InputStream inputStream) throws IOException {
try (PDDocument document = PDDocument.load(inputStream)) {
if (document.isEncrypted()) {
throw new EncryptionNotSupportedException("Error: Cannot read metadata from encrypted document.");
}

try (PDDocument document = loadWithAutomaticDecryption(inputStream)) {
return XMPUtil.getXMPMetadata(document);

}
}

Expand Down Expand Up @@ -1040,8 +1067,7 @@ public static void writeXMP(File file,

try (PDDocument document = PDDocument.load(file.getAbsoluteFile())) {
if (document.isEncrypted()) {
throw new EncryptionNotSupportedException(
"Error: Cannot add metadata to encrypted document.");
throw new EncryptionNotSupportedException();
}

if (writePDFInfo && (resolvedEntries.size() == 1)) {
Expand Down Expand Up @@ -1087,10 +1113,9 @@ public static void writeXMP(File file,
try {
document.save(file.getAbsolutePath());
} catch (COSVisitorException e) {
throw new TransformerException("Could not write XMP-metadata: "
+ e.getLocalizedMessage());
LOGGER.debug("Could not write XMP metadata", e);
throw new TransformerException("Could not write XMP metadata: " + e.getLocalizedMessage(), e);
}

}
}

Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_da.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1281,7 +1281,6 @@ You_must_select_either_a_valid_style_file,_or_use_a_default_style.=Du_skal_enten
Show=Vis

This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=Dette_er_et_enkelt_klip-og-inds\u00e6t-vindue._Indl\u00e6s_eller_inds\u00e6t_f\u00f8rst_tekst_i_tekstfeltet.<br>Derefter_kan_du_markere_tekst_og_tildele_den_til_BibTeX-felter.
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=Java_Bouncy_Castle-bibliotek_ikke_tilg\u00e6ngeligt._For_at_f\u00e5_adgang_til_dette_kan_du_installere_det._For_mere_information,_se_http://www.bouncycastle.org/.

This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=Denne_funktion_genererer_en_ny_database_baseret_p\u00e5,_hvilke_poster_der_er_brugt_i_et_eksisterende_LaTeX-dokument.
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=Du_skal_v\u00e6lge_en_af_de_\u00e5bne_databaser,_hvor_enheder_skal_hentes_fra,_udover_AUX-filen_genereret_af_LaTeX_n\u00e5r_dokumentet_kompileres.
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_de.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1987,7 +1987,6 @@ You_must_select_either_a_valid_style_file,_or_use_a_default_style.=Sie_m\u00fcss
Show=Zeigen

This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=Dieser_Dialog_erm&ouml;glicht_das_schnelle_Einf&uuml;gen_von_Eintr&auml;gen_aus_normalen_Text._Die_gew&uuml;nschten_Textstellen<br>werden_markiert_und_z.B._durch_Doppelklick_einem_selektierten_BibTeX_Eintrag_zugeordnet.
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=Die_Java_Bouncy_Castle-Bibliothek_wurde_nicht_gefunden._Bitte_laden_Sie_sie_herunter_und_installieren_sie._Mehr_Informationen_unter_http://www.bouncycastle.org/.

This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=Diese_Funktion_erstellt_eine_neue_Datei_basierend_auf_den_Eintr\u00e4gen,_die_von_einem_bestehenden_LaTeX-Dokument_ben\u00f6tigt_werden.
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=Sie_m\u00fcssen_eine_Ihrer_ge\u00f6ffneten_Dateien,_von_denen_Eintr\u00e4ge_genommen_werden_sollen,_sowie_die_AUX-Datei,_die_von_LaTeX_beim_Kompilieren_Ihres_Dokuments_erstellt_wird,_ausw\u00e4hlen.
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1987,7 +1987,6 @@ You_must_select_either_a_valid_style_file,_or_use_a_default_style.=You_must_sele
Show=Show

This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http://www.bouncycastle.org/.
This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.

Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_es.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,6 @@ You_must_select_either_a_valid_style_file,_or_use_one_of_the_default_styles.=Deb
You_must_select_either_a_valid_style_file,_or_use_a_default_style.=Debe_seleccionar_un_archivo_de_estilo_v\u00e1lido_o_usar_uno_estilo_por_defecto.
Show=Mostrar
This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=Este_es_un_di\u00e1logo_de_copia-pega_simple._Primero,_cargue_o_pegue_algo_de_texto_en_el_\u00e1rea_de_entrada_de_texto.<br>Posteriormente,_puede_marcar_texto_y_asignarlo_a_un_campo_BibTeX.
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=No_se_encuentra_la_librer\u00eda_Java_Bouncy_Castle._Por_favor,_desc\u00e1rguela_e_inst\u00e1lela._Para_m\u00e1s_informaci\u00f3n,_vea_http://bouncycastle.org/.
This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=Esta_funcionalidad_genera_una_nueva_base_de_datos_basada_en_las_entradas_que_se_necesitan_en_un_documento_LaTeX_existente.
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=Necesita_seleccionar_una_de_sus_bases_de_datos_abiertas_desde_la_que_se_escoger\u00e1n_entradas,_as\u00ed_como_el_archivo_AUX_generado_por_LaTeX_al_compilar_su_documento.
First_select_entries_to_clean_up.=Seleccione_las_entradas_a_limpiar_en_primer_lugar.
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_fa.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1948,7 +1948,6 @@ You_must_select_either_a_valid_style_file,_or_use_a_default_style.=
Show=

This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=
This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=

Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_fr.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,6 @@ You_must_select_either_a_valid_style_file,_or_use_a_default_style.=Vous_devez_s\
Show=Afficher

This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=Ceci_est_simplement_une_fen&ecirc;tre_de_copier-coller._Commencez_par_charger_ou_coller_du_texte_dans_la_zone_de_saisie_de_texte.<br>Ensuite,_vous_pouvez_s&eacute;lectionner_des_portions_de_texte_et_les_attribuer_&agrave;_des_champs_BibTeX.
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=Biblioth\u00e8que_Java_Bouncy_Castle_library_non_trouv\u00e9._S'il_vous_plait,_t\u00e9l\u00e9chargez_e__installez-l\u00e0._Pour_plus_d'informations,_voir_http://www.bouncycastle.org/.

This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=Cette_fonction_g\u00e9n\u00e8re_une_nouvelle_base_de_donn\u00e9es_bas\u00e9es_sur_les_entr\u00e9es_requises_par_un_document_LaTeX_existant.
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=Vous_devez_s\u00e9lectionnner_une_de_vos_bases_de_donn\u00e9es_ouvertes_\u00e0_partir_de_laquelle_choisir_vos_entre\u00e9es,_ainsi_que_le_fichier_AUX_produit_par_LaTeX_lors_de_la_compilation_du_document.
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_in.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,6 @@ You_must_select_either_a_valid_style_file,_or_use_a_default_style.=

Show=
This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=

This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=
Expand Down
1 change: 0 additions & 1 deletion src/main/resources/l10n/JabRef_it.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1298,7 +1298,6 @@ You_must_select_either_a_valid_style_file,_or_use_one_of_the_default_styles.=Sel
You_must_select_either_a_valid_style_file,_or_use_a_default_style.=Selezionare_un_file_di_stile_valido_oppure_utilizzare_uno_stile_predefinito.
Show=Mostra
This_is_a_simple_copy_and_paste_dialog._First_load_or_paste_some_text_into_the_text_input_area.<br>After_that,_you_can_mark_text_and_assign_it_to_a_BibTeX_field.=Questo_\u00e8_un_semplice_dialogo_di_copia_e_incolla._Prima_carica_o_incolla_il_testo_nell'area_di_inserimento_di_testo.<BR>Quindi_\u00e8_possibile_selezionare_parti_del_testo_e_assegnarle_ai_campi_BibTeX.
Java_Bouncy_Castle_library_not_found._Please_download_and_install_it._For_more_information_see_http\://www.bouncycastle.org/.=La_libreria_Java_Bouncy_Castle_non_\u00e8_stata_trovata._\u00c8_necessario_scaricarla_ed_installarla._Per_ulteriori_informazioni_vedi_http://www.bouncycastle.org/.

This_feature_generates_a_new_database_based_on_which_entries_are_needed_in_an_existing_LaTeX_document.=Questa_funzione_genera_un_nuovo_database_basato_sulle_voci_necessarie_in_un_documento_LaTeX_esistente.
You_need_to_select_one_of_your_open_databases_from_which_to_choose_entries,_as_well_as_the_AUX_file_produced_by_LaTeX_when_compiling_your_document.=\u00c8_necessario_selezionare_uno_dei_database_aperti_da_cui_scegliere_le_voci,_cos\u00ec_come_il_file_AUX_prodotto_da_LaTeX_nel_compilare_il_documento.
Expand Down
Loading

0 comments on commit 91c8af0

Please sign in to comment.