Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite bibtexml importer with JAXB parser #1666

Merged
merged 7 commits into from Aug 18, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.gradle
Expand Up @@ -32,7 +32,7 @@ apply plugin: 'checkstyle'

apply from: 'eclipse.gradle'
apply from: 'localization.gradle'
apply from: 'medline.gradle'
apply from: 'xjc.gradle'

group = "net.sf.jabref"
version = "3.6dev"
Expand Down
107 changes: 0 additions & 107 deletions src/main/java/net/sf/jabref/importer/fileformat/BibTeXMLHandler.java

This file was deleted.

214 changes: 189 additions & 25 deletions src/main/java/net/sf/jabref/importer/fileformat/BibTeXMLImporter.java
Expand Up @@ -17,21 +17,37 @@

import java.io.BufferedReader;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Unmarshaller;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

import net.sf.jabref.importer.ParserResult;
import net.sf.jabref.importer.fileformat.bibtexml.Entry;
import net.sf.jabref.importer.fileformat.bibtexml.File;
import net.sf.jabref.importer.fileformat.bibtexml.Inbook;
import net.sf.jabref.importer.fileformat.bibtexml.Incollection;
import net.sf.jabref.model.entry.BibEntry;
import net.sf.jabref.model.entry.FieldName;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.xml.sax.InputSource;

/**
* Importer for the BibTeXML format.
Expand All @@ -45,6 +61,10 @@ public class BibTeXMLImporter extends ImportFormat {

private static final Pattern START_PATTERN = Pattern.compile("<(bibtex:)?file .*");

private static final List<String> FIELDS_TO_SKIP = Arrays.asList("getClass", "getAnnotate", "getContents",
"getPrice",
"getSize", "getChapter");

@Override
public String getFormatName() {
return "BibTeXML";
Expand Down Expand Up @@ -78,33 +98,177 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException {

List<BibEntry> bibItems = new ArrayList<>();

// Obtain a factory object for creating SAX parsers
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
// Configure the factory object to specify attributes of the parsers it
// creates
// parserFactory.setValidating(true);
parserFactory.setNamespaceAware(true);
// Now create a SAXParser object

try {
SAXParser parser = parserFactory.newSAXParser(); //May throw exceptions
BibTeXMLHandler handler = new BibTeXMLHandler();
// Start the parser. It reads the file and calls methods of the handler.
parser.parse(new InputSource(reader), handler);
// When you're done, report the results stored by your handler object
bibItems.addAll(handler.getItems());

} catch (javax.xml.parsers.ParserConfigurationException e) {
JAXBContext context = JAXBContext.newInstance("net.sf.jabref.importer.fileformat.bibtexml");
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
XMLStreamReader xmlReader = xmlInputFactory.createXMLStreamReader(reader);

//go to the root element
while (!xmlReader.isStartElement()) {
xmlReader.next();
}

Unmarshaller unmarshaller = context.createUnmarshaller();
File file = (File) unmarshaller.unmarshal(xmlReader);

List<Entry> entries = file.getEntry();
Map<String, String> fields = new HashMap<>();

for (Entry entry : entries) {
BibEntry bibEntry = new BibEntry(DEFAULT_BIBTEXENTRY_ID);
if (entry.getArticle() != null) {
bibEntry.setType("article");
parse(entry.getArticle(), fields);
}
if (entry.getBook() != null) {
bibEntry.setType("book");
parse(entry.getBook(), fields);
}
if (entry.getBooklet() != null) {
bibEntry.setType("booklet");
parse(entry.getBooklet(), fields);
}
if (entry.getConference() != null) {
bibEntry.setType("conference");
parse(entry.getConference(), fields);
}
if (entry.getInbook() != null) {
bibEntry.setType("inbook");
parseInbook(entry.getInbook(), fields);
}
if (entry.getIncollection() != null) {
bibEntry.setType("incollection");
Incollection incollection = entry.getIncollection();
if (incollection.getChapter() != null) {
fields.put(FieldName.CHAPTER, String.valueOf(incollection.getChapter()));
}
parse(incollection, fields);
}
if (entry.getInproceedings() != null) {
bibEntry.setType("inproceedings");
parse(entry.getInproceedings(), fields);
}
if (entry.getManual() != null) {
bibEntry.setType("manual");
parse(entry.getManual(), fields);
}
if (entry.getMastersthesis() != null) {
bibEntry.setType("mastersthesis");
parse(entry.getMastersthesis(), fields);
}
if (entry.getMisc() != null) {
bibEntry.setType("misc");
parse(entry.getMisc(), fields);
}
if (entry.getPhdthesis() != null) {
bibEntry.setType("phdthesis");
parse(entry.getPhdthesis(), fields);
}
if (entry.getProceedings() != null) {
bibEntry.setType("proceedings");
parse(entry.getProceedings(), fields);
}
if (entry.getTechreport() != null) {
bibEntry.setType("techreport");
parse(entry.getTechreport(), fields);
}
if (entry.getUnpublished() != null) {
bibEntry.setType("unpublished");
parse(entry.getUnpublished(), fields);
}
if (entry.getId() != null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if condition shouldn't be necessary as the xsd schema file specifies the attribute as use="required"

bibEntry.setCiteKey(entry.getId());
}
bibEntry.setField(fields);
bibItems.add(bibEntry);
}
} catch (JAXBException | XMLStreamException e) {
LOGGER.error("Error with XML parser configuration", e);
return ParserResult.fromErrorMessage(e.getLocalizedMessage());
} catch (org.xml.sax.SAXException e) {
LOGGER.error("Error during XML parsing", e);
return ParserResult.fromErrorMessage(e.getLocalizedMessage());
} catch (IOException e) {
LOGGER.error("Error during file import", e);
return ParserResult.fromErrorMessage(e.getLocalizedMessage());
}
return new ParserResult(bibItems);
}

/**
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some more explanation why you choose that implementation method and not working on the real classes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

* In this method, all <Code>get</Code> methods that t has will be used and their value will be put to fields,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

t -> entryType

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

* if it is not null. So for example if entryType has the method <Code>getAbstract</Code>, then
* "abstract" will be put as key to fields and the value of <Code>getAbstract</Code> will be put as value to fields.
* Some <Code>get</Code> methods shouldn't be mapped to fields, so <Code>getClass</Code> for example will be skipped.
*
* @param entryType This can be all possible BibTeX types. It contains all fields of the entry and their values.
* @param fields A map where the name and the value of all fields, that the entry contains, will be put.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove commas

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

*/
private <T> void parse(T entryType, Map<String, String> fields) {
Method[] declaredMethods = entryType.getClass().getDeclaredMethods();
for (Method method : declaredMethods) {
try {
if (method.getName().equals("getYear")) {
putYear(fields, (XMLGregorianCalendar) method.invoke(entryType));
continue;
} else if (method.getName().equals("getNumber")) {
putNumber(fields, (BigInteger) method.invoke(entryType));
continue;
} else if (isFieldToSkip(method.getName())) {
continue;
} else if (method.getName().contains("get")) {
putIfValueNotNull(fields, method.getName().replace("get", ""), (String) method.invoke(entryType));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please append this to the first if statement with an else if

} catch (IllegalArgumentException | InvocationTargetException | IllegalAccessException e) {
LOGGER.error("Could not invoke method", e);
}
}
}

/**
* Returns whether the value of the given method name should be mapped or whether the field can be skipped.
*
* @param name of a method
* @return true if the field can be skipped, else false
*/
private boolean isFieldToSkip(String name) {
return FIELDS_TO_SKIP.contains(name);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either remove the get prefix and check for ignored case or name this "ignoredMethods" etc. --> getSize is not a bibtex field.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

}

private void parseInbook(Inbook inbook, Map<String, String> fields) {
Copy link
Member

@koppor koppor Aug 12, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need a special Treatment for Inbook, but not for the others?

Please provide JavaDoc comment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

List<JAXBElement<?>> content = inbook.getContent();
for (JAXBElement<?> element : content) {
String localName = element.getName().getLocalPart();
Object elementValue = element.getValue();
if (elementValue instanceof String) {
String value = (String) elementValue;
putIfValueNotNull(fields, localName, value);
} else if (elementValue instanceof BigInteger) {
BigInteger value = (BigInteger) elementValue;
if (FieldName.NUMBER.equals(localName)) {
putNumber(fields, value);
}
if (FieldName.CHAPTER.equals(localName) && (value != null)) {
fields.put(FieldName.CHAPTER, String.valueOf(value));
}
} else if (elementValue instanceof XMLGregorianCalendar) {
XMLGregorianCalendar value = (XMLGregorianCalendar) elementValue;
if (FieldName.YEAR.equals(localName)) {
putYear(fields, value);
}
}
}
}

private void putYear(Map<String, String> fields, XMLGregorianCalendar year) {
if (year != null) {
fields.put(FieldName.YEAR, String.valueOf(year));
}
}

private void putNumber(Map<String, String> fields, BigInteger number) {
if (number != null) {
fields.put(FieldName.NUMBER, String.valueOf(number));
}
}

private void putIfValueNotNull(Map<String, String> fields, String key, String value) {
if (value != null) {
fields.put(key, value);
}
}
}