New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrite bibtexml importer with JAXB parser #1666
Changes from 4 commits
fe63c1a
ef6d2dc
16881b5
6eb09a5
478353e
2919054
1725aca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,22 +17,37 @@ | |
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.lang.reflect.InvocationTargetException; | ||
import java.lang.reflect.Method; | ||
import java.math.BigInteger; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.regex.Pattern; | ||
|
||
import javax.xml.parsers.SAXParser; | ||
import javax.xml.parsers.SAXParserFactory; | ||
import javax.xml.bind.JAXBContext; | ||
import javax.xml.bind.JAXBElement; | ||
import javax.xml.bind.JAXBException; | ||
import javax.xml.bind.Unmarshaller; | ||
import javax.xml.datatype.XMLGregorianCalendar; | ||
import javax.xml.stream.XMLInputFactory; | ||
import javax.xml.stream.XMLStreamException; | ||
import javax.xml.stream.XMLStreamReader; | ||
|
||
import net.sf.jabref.importer.fileformat.bibtexml.Entry; | ||
import net.sf.jabref.importer.fileformat.bibtexml.File; | ||
import net.sf.jabref.importer.fileformat.bibtexml.Inbook; | ||
import net.sf.jabref.importer.fileformat.bibtexml.Incollection; | ||
import net.sf.jabref.logic.importer.ParserResult; | ||
import net.sf.jabref.logic.importer.util.BibTeXMLHandler; | ||
import net.sf.jabref.model.entry.BibEntry; | ||
import net.sf.jabref.model.entry.FieldName; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.xml.sax.InputSource; | ||
|
||
/** | ||
* Importer for the BibTeXML format. | ||
|
@@ -46,6 +61,10 @@ public class BibTeXMLImporter extends ImportFormat { | |
|
||
private static final Pattern START_PATTERN = Pattern.compile("<(bibtex:)?file .*"); | ||
|
||
private static final List<String> IGNORED_METHODS = Arrays.asList("getClass", "getAnnotate", "getContents", | ||
"getPrice", "getSize", "getChapter"); | ||
|
||
|
||
@Override | ||
public String getFormatName() { | ||
return "BibTeXML"; | ||
|
@@ -79,33 +98,184 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException { | |
|
||
List<BibEntry> bibItems = new ArrayList<>(); | ||
|
||
// Obtain a factory object for creating SAX parsers | ||
SAXParserFactory parserFactory = SAXParserFactory.newInstance(); | ||
// Configure the factory object to specify attributes of the parsers it | ||
// creates | ||
// parserFactory.setValidating(true); | ||
parserFactory.setNamespaceAware(true); | ||
// Now create a SAXParser object | ||
|
||
try { | ||
SAXParser parser = parserFactory.newSAXParser(); //May throw exceptions | ||
BibTeXMLHandler handler = new BibTeXMLHandler(); | ||
// Start the parser. It reads the file and calls methods of the handler. | ||
parser.parse(new InputSource(reader), handler); | ||
// When you're done, report the results stored by your handler object | ||
bibItems.addAll(handler.getItems()); | ||
|
||
} catch (javax.xml.parsers.ParserConfigurationException e) { | ||
JAXBContext context = JAXBContext.newInstance("net.sf.jabref.importer.fileformat.bibtexml"); | ||
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); | ||
XMLStreamReader xmlReader = xmlInputFactory.createXMLStreamReader(reader); | ||
|
||
//go to the root element | ||
while (!xmlReader.isStartElement()) { | ||
xmlReader.next(); | ||
} | ||
|
||
Unmarshaller unmarshaller = context.createUnmarshaller(); | ||
File file = (File) unmarshaller.unmarshal(xmlReader); | ||
|
||
List<Entry> entries = file.getEntry(); | ||
Map<String, String> fields = new HashMap<>(); | ||
|
||
for (Entry entry : entries) { | ||
BibEntry bibEntry = new BibEntry(DEFAULT_BIBTEXENTRY_ID); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, the variable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
if (entry.getArticle() != null) { | ||
bibEntry.setType("article"); | ||
parse(entry.getArticle(), fields); | ||
} | ||
if (entry.getBook() != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah you're right. Changed it to |
||
bibEntry.setType("book"); | ||
parse(entry.getBook(), fields); | ||
} | ||
if (entry.getBooklet() != null) { | ||
bibEntry.setType("booklet"); | ||
parse(entry.getBooklet(), fields); | ||
} | ||
if (entry.getConference() != null) { | ||
bibEntry.setType("conference"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also cover this case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
parse(entry.getConference(), fields); | ||
} | ||
if (entry.getInbook() != null) { | ||
bibEntry.setType("inbook"); | ||
parseInbook(entry.getInbook(), fields); | ||
} | ||
if (entry.getIncollection() != null) { | ||
bibEntry.setType("incollection"); | ||
Incollection incollection = entry.getIncollection(); | ||
if (incollection.getChapter() != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please test the other branch (a collection without a chapter), too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
fields.put(FieldName.CHAPTER, String.valueOf(incollection.getChapter())); | ||
} | ||
parse(incollection, fields); | ||
} | ||
if (entry.getInproceedings() != null) { | ||
bibEntry.setType("inproceedings"); | ||
parse(entry.getInproceedings(), fields); | ||
} | ||
if (entry.getManual() != null) { | ||
bibEntry.setType("manual"); | ||
parse(entry.getManual(), fields); | ||
} | ||
if (entry.getMastersthesis() != null) { | ||
bibEntry.setType("mastersthesis"); | ||
parse(entry.getMastersthesis(), fields); | ||
} | ||
if (entry.getMisc() != null) { | ||
bibEntry.setType("misc"); | ||
parse(entry.getMisc(), fields); | ||
} | ||
if (entry.getPhdthesis() != null) { | ||
bibEntry.setType("phdthesis"); | ||
parse(entry.getPhdthesis(), fields); | ||
} | ||
if (entry.getProceedings() != null) { | ||
bibEntry.setType("proceedings"); | ||
parse(entry.getProceedings(), fields); | ||
} | ||
if (entry.getTechreport() != null) { | ||
bibEntry.setType("techreport"); | ||
parse(entry.getTechreport(), fields); | ||
} | ||
if (entry.getUnpublished() != null) { | ||
bibEntry.setType("unpublished"); | ||
parse(entry.getUnpublished(), fields); | ||
} | ||
if (entry.getId() != null) { | ||
bibEntry.setCiteKey(entry.getId()); | ||
} | ||
bibEntry.setField(fields); | ||
bibItems.add(bibEntry); | ||
} | ||
} catch (JAXBException | XMLStreamException e) { | ||
LOGGER.error("Error with XML parser configuration", e); | ||
return ParserResult.fromErrorMessage(e.getLocalizedMessage()); | ||
} catch (org.xml.sax.SAXException e) { | ||
LOGGER.error("Error during XML parsing", e); | ||
return ParserResult.fromErrorMessage(e.getLocalizedMessage()); | ||
} catch (IOException e) { | ||
LOGGER.error("Error during file import", e); | ||
return ParserResult.fromErrorMessage(e.getLocalizedMessage()); | ||
} | ||
return new ParserResult(bibItems); | ||
} | ||
|
||
/** | ||
* We use a generic method and not work on the real classes, because they all have the same behaviour. They call all get methods | ||
* that are needed and use the return value. So this will prevent writing similar methods for every type. | ||
* <p> | ||
* In this method, all <Code>get</Code> methods that entryType has will be used and their value will be put to fields, | ||
* if it is not null. So for example if entryType has the method <Code>getAbstract</Code>, then | ||
* "abstract" will be put as key to fields and the value of <Code>getAbstract</Code> will be put as value to fields. | ||
* Some <Code>get</Code> methods shouldn't be mapped to fields, so <Code>getClass</Code> for example will be skipped. | ||
* | ||
* @param entryType This can be all possible BibTeX types. It contains all fields of the entry and their values. | ||
* @param fields A map where the name and the value of all fields that the entry contains will be put. | ||
*/ | ||
private <T> void parse(T entryType, Map<String, String> fields) { | ||
Method[] declaredMethods = entryType.getClass().getDeclaredMethods(); | ||
for (Method method : declaredMethods) { | ||
try { | ||
if (method.getName().equals("getYear")) { | ||
putYear(fields, (XMLGregorianCalendar) method.invoke(entryType)); | ||
continue; | ||
} else if (method.getName().equals("getNumber")) { | ||
putNumber(fields, (BigInteger) method.invoke(entryType)); | ||
continue; | ||
} else if (isMethodToIgnore(method.getName())) { | ||
continue; | ||
} else if (method.getName().contains("get")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changed to |
||
putIfValueNotNull(fields, method.getName().replace("get", ""), (String) method.invoke(entryType)); | ||
} | ||
} catch (IllegalArgumentException | InvocationTargetException | IllegalAccessException e) { | ||
LOGGER.error("Could not invoke method", e); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Returns whether the value of the given method name should be mapped or whether the method can be ignored. | ||
* | ||
* @param methodName The name of the method as String | ||
* @return true if the method can be ignored, else false | ||
*/ | ||
private boolean isMethodToIgnore(String methodName) { | ||
return IGNORED_METHODS.contains(methodName); | ||
} | ||
|
||
/** | ||
* Inbook needs a special Treatment, because <Code>inbook.getContent()</Code> returns a list of <Code>JAXBElements</Code>. | ||
* The other types have just <Code>get</Code> methods, which return the values as Strings. | ||
*/ | ||
private void parseInbook(Inbook inbook, Map<String, String> fields) { | ||
List<JAXBElement<?>> content = inbook.getContent(); | ||
for (JAXBElement<?> element : content) { | ||
String localName = element.getName().getLocalPart(); | ||
Object elementValue = element.getValue(); | ||
if (elementValue instanceof String) { | ||
String value = (String) elementValue; | ||
putIfValueNotNull(fields, localName, value); | ||
} else if (elementValue instanceof BigInteger) { | ||
BigInteger value = (BigInteger) elementValue; | ||
if (FieldName.NUMBER.equals(localName)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
putNumber(fields, value); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is that line not covered? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now it should be covered. |
||
} | ||
if (FieldName.CHAPTER.equals(localName) && (value != null)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't be that an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Changed it to |
||
fields.put(FieldName.CHAPTER, String.valueOf(value)); | ||
} | ||
} else if (elementValue instanceof XMLGregorianCalendar) { | ||
XMLGregorianCalendar value = (XMLGregorianCalendar) elementValue; | ||
if (FieldName.YEAR.equals(localName)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if there is another localName used? Maybe, just log it to indicate that is is an unexpected file? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See my comment below. I don't think that this can happen here, because it should be skipped JAXB. |
||
putYear(fields, value); | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't there be an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that this is necessary. If there is an unexpected field, it will not be mapped by JAXB. So if I add here an else, it would be unreachable in my opinion. I've also tested this by adding a field in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add the log nevertheless. In case the XSD is updated, the code might not get updated and the import fail. Having a log (INFO), it is more easy to trace where the code has to be adapted. Please remove this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
} | ||
|
||
private void putYear(Map<String, String> fields, XMLGregorianCalendar year) { | ||
if (year != null) { | ||
fields.put(FieldName.YEAR, String.valueOf(year)); | ||
} | ||
} | ||
|
||
private void putNumber(Map<String, String> fields, BigInteger number) { | ||
if (number != null) { | ||
fields.put(FieldName.NUMBER, String.valueOf(number)); | ||
} | ||
} | ||
|
||
private void putIfValueNotNull(Map<String, String> fields, String key, String value) { | ||
if (value != null) { | ||
fields.put(key, value); | ||
} | ||
} | ||
} |
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why can't directly the reader used as input for the unmarshaller?
The JavaDocs of the unmarshaller say so: https://jaxb.java.net/nonav/2.2.4/docs/api/javax/xml/bind/Unmarshaller.html
Reading that, the code would be much shorter and especially lines 106 to 109 would be obsolete
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. I've used directly the reader as input.