New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rewrite bibtexml importer with JAXB parser #1666
Changes from 1 commit
fe63c1a
ef6d2dc
16881b5
6eb09a5
478353e
2919054
1725aca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,21 +17,37 @@ | |
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.lang.reflect.InvocationTargetException; | ||
import java.lang.reflect.Method; | ||
import java.math.BigInteger; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.regex.Pattern; | ||
|
||
import javax.xml.parsers.SAXParser; | ||
import javax.xml.parsers.SAXParserFactory; | ||
import javax.xml.bind.JAXBContext; | ||
import javax.xml.bind.JAXBElement; | ||
import javax.xml.bind.JAXBException; | ||
import javax.xml.bind.Unmarshaller; | ||
import javax.xml.datatype.XMLGregorianCalendar; | ||
import javax.xml.stream.XMLInputFactory; | ||
import javax.xml.stream.XMLStreamException; | ||
import javax.xml.stream.XMLStreamReader; | ||
|
||
import net.sf.jabref.importer.ParserResult; | ||
import net.sf.jabref.importer.fileformat.bibtexml.Entry; | ||
import net.sf.jabref.importer.fileformat.bibtexml.File; | ||
import net.sf.jabref.importer.fileformat.bibtexml.Inbook; | ||
import net.sf.jabref.importer.fileformat.bibtexml.Incollection; | ||
import net.sf.jabref.model.entry.BibEntry; | ||
import net.sf.jabref.model.entry.FieldName; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
import org.xml.sax.InputSource; | ||
|
||
/** | ||
* Importer for the BibTeXML format. | ||
|
@@ -45,6 +61,10 @@ public class BibTeXMLImporter extends ImportFormat { | |
|
||
private static final Pattern START_PATTERN = Pattern.compile("<(bibtex:)?file .*"); | ||
|
||
private static final List<String> FIELDS_TO_SKIP = Arrays.asList("getClass", "getAnnotate", "getContents", | ||
"getPrice", | ||
"getSize", "getChapter"); | ||
|
||
@Override | ||
public String getFormatName() { | ||
return "BibTeXML"; | ||
|
@@ -78,33 +98,177 @@ public ParserResult importDatabase(BufferedReader reader) throws IOException { | |
|
||
List<BibEntry> bibItems = new ArrayList<>(); | ||
|
||
// Obtain a factory object for creating SAX parsers | ||
SAXParserFactory parserFactory = SAXParserFactory.newInstance(); | ||
// Configure the factory object to specify attributes of the parsers it | ||
// creates | ||
// parserFactory.setValidating(true); | ||
parserFactory.setNamespaceAware(true); | ||
// Now create a SAXParser object | ||
|
||
try { | ||
SAXParser parser = parserFactory.newSAXParser(); //May throw exceptions | ||
BibTeXMLHandler handler = new BibTeXMLHandler(); | ||
// Start the parser. It reads the file and calls methods of the handler. | ||
parser.parse(new InputSource(reader), handler); | ||
// When you're done, report the results stored by your handler object | ||
bibItems.addAll(handler.getItems()); | ||
|
||
} catch (javax.xml.parsers.ParserConfigurationException e) { | ||
JAXBContext context = JAXBContext.newInstance("net.sf.jabref.importer.fileformat.bibtexml"); | ||
XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory(); | ||
XMLStreamReader xmlReader = xmlInputFactory.createXMLStreamReader(reader); | ||
|
||
//go to the root element | ||
while (!xmlReader.isStartElement()) { | ||
xmlReader.next(); | ||
} | ||
|
||
Unmarshaller unmarshaller = context.createUnmarshaller(); | ||
File file = (File) unmarshaller.unmarshal(xmlReader); | ||
|
||
List<Entry> entries = file.getEntry(); | ||
Map<String, String> fields = new HashMap<>(); | ||
|
||
for (Entry entry : entries) { | ||
BibEntry bibEntry = new BibEntry(DEFAULT_BIBTEXENTRY_ID); | ||
if (entry.getArticle() != null) { | ||
bibEntry.setType("article"); | ||
parse(entry.getArticle(), fields); | ||
} | ||
if (entry.getBook() != null) { | ||
bibEntry.setType("book"); | ||
parse(entry.getBook(), fields); | ||
} | ||
if (entry.getBooklet() != null) { | ||
bibEntry.setType("booklet"); | ||
parse(entry.getBooklet(), fields); | ||
} | ||
if (entry.getConference() != null) { | ||
bibEntry.setType("conference"); | ||
parse(entry.getConference(), fields); | ||
} | ||
if (entry.getInbook() != null) { | ||
bibEntry.setType("inbook"); | ||
parseInbook(entry.getInbook(), fields); | ||
} | ||
if (entry.getIncollection() != null) { | ||
bibEntry.setType("incollection"); | ||
Incollection incollection = entry.getIncollection(); | ||
if (incollection.getChapter() != null) { | ||
fields.put(FieldName.CHAPTER, String.valueOf(incollection.getChapter())); | ||
} | ||
parse(incollection, fields); | ||
} | ||
if (entry.getInproceedings() != null) { | ||
bibEntry.setType("inproceedings"); | ||
parse(entry.getInproceedings(), fields); | ||
} | ||
if (entry.getManual() != null) { | ||
bibEntry.setType("manual"); | ||
parse(entry.getManual(), fields); | ||
} | ||
if (entry.getMastersthesis() != null) { | ||
bibEntry.setType("mastersthesis"); | ||
parse(entry.getMastersthesis(), fields); | ||
} | ||
if (entry.getMisc() != null) { | ||
bibEntry.setType("misc"); | ||
parse(entry.getMisc(), fields); | ||
} | ||
if (entry.getPhdthesis() != null) { | ||
bibEntry.setType("phdthesis"); | ||
parse(entry.getPhdthesis(), fields); | ||
} | ||
if (entry.getProceedings() != null) { | ||
bibEntry.setType("proceedings"); | ||
parse(entry.getProceedings(), fields); | ||
} | ||
if (entry.getTechreport() != null) { | ||
bibEntry.setType("techreport"); | ||
parse(entry.getTechreport(), fields); | ||
} | ||
if (entry.getUnpublished() != null) { | ||
bibEntry.setType("unpublished"); | ||
parse(entry.getUnpublished(), fields); | ||
} | ||
if (entry.getId() != null) { | ||
bibEntry.setCiteKey(entry.getId()); | ||
} | ||
bibEntry.setField(fields); | ||
bibItems.add(bibEntry); | ||
} | ||
} catch (JAXBException | XMLStreamException e) { | ||
LOGGER.error("Error with XML parser configuration", e); | ||
return ParserResult.fromErrorMessage(e.getLocalizedMessage()); | ||
} catch (org.xml.sax.SAXException e) { | ||
LOGGER.error("Error during XML parsing", e); | ||
return ParserResult.fromErrorMessage(e.getLocalizedMessage()); | ||
} catch (IOException e) { | ||
LOGGER.error("Error during file import", e); | ||
return ParserResult.fromErrorMessage(e.getLocalizedMessage()); | ||
} | ||
return new ParserResult(bibItems); | ||
} | ||
|
||
/** | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add some more explanation why you choose that implementation method and not working on the real classes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
* In this method, all <Code>get</Code> methods that t has will be used and their value will be put to fields, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
* if it is not null. So for example if entryType has the method <Code>getAbstract</Code>, then | ||
* "abstract" will be put as key to fields and the value of <Code>getAbstract</Code> will be put as value to fields. | ||
* Some <Code>get</Code> methods shouldn't be mapped to fields, so <Code>getClass</Code> for example will be skipped. | ||
* | ||
* @param entryType This can be all possible BibTeX types. It contains all fields of the entry and their values. | ||
* @param fields A map where the name and the value of all fields, that the entry contains, will be put. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commas There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
*/ | ||
private <T> void parse(T entryType, Map<String, String> fields) { | ||
Method[] declaredMethods = entryType.getClass().getDeclaredMethods(); | ||
for (Method method : declaredMethods) { | ||
try { | ||
if (method.getName().equals("getYear")) { | ||
putYear(fields, (XMLGregorianCalendar) method.invoke(entryType)); | ||
continue; | ||
} else if (method.getName().equals("getNumber")) { | ||
putNumber(fields, (BigInteger) method.invoke(entryType)); | ||
continue; | ||
} else if (isFieldToSkip(method.getName())) { | ||
continue; | ||
} else if (method.getName().contains("get")) { | ||
putIfValueNotNull(fields, method.getName().replace("get", ""), (String) method.invoke(entryType)); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please append this to the first if statement with an |
||
} catch (IllegalArgumentException | InvocationTargetException | IllegalAccessException e) { | ||
LOGGER.error("Could not invoke method", e); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Returns whether the value of the given method name should be mapped or whether the field can be skipped. | ||
* | ||
* @param name of a method | ||
* @return true if the field can be skipped, else false | ||
*/ | ||
private boolean isFieldToSkip(String name) { | ||
return FIELDS_TO_SKIP.contains(name); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Either remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
} | ||
|
||
private void parseInbook(Inbook inbook, Map<String, String> fields) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need a special Treatment for Inbook, but not for the others? Please provide JavaDoc comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
List<JAXBElement<?>> content = inbook.getContent(); | ||
for (JAXBElement<?> element : content) { | ||
String localName = element.getName().getLocalPart(); | ||
Object elementValue = element.getValue(); | ||
if (elementValue instanceof String) { | ||
String value = (String) elementValue; | ||
putIfValueNotNull(fields, localName, value); | ||
} else if (elementValue instanceof BigInteger) { | ||
BigInteger value = (BigInteger) elementValue; | ||
if (FieldName.NUMBER.equals(localName)) { | ||
putNumber(fields, value); | ||
} | ||
if (FieldName.CHAPTER.equals(localName) && (value != null)) { | ||
fields.put(FieldName.CHAPTER, String.valueOf(value)); | ||
} | ||
} else if (elementValue instanceof XMLGregorianCalendar) { | ||
XMLGregorianCalendar value = (XMLGregorianCalendar) elementValue; | ||
if (FieldName.YEAR.equals(localName)) { | ||
putYear(fields, value); | ||
} | ||
} | ||
} | ||
} | ||
|
||
private void putYear(Map<String, String> fields, XMLGregorianCalendar year) { | ||
if (year != null) { | ||
fields.put(FieldName.YEAR, String.valueOf(year)); | ||
} | ||
} | ||
|
||
private void putNumber(Map<String, String> fields, BigInteger number) { | ||
if (number != null) { | ||
fields.put(FieldName.NUMBER, String.valueOf(number)); | ||
} | ||
} | ||
|
||
private void putIfValueNotNull(Map<String, String> fields, String key, String value) { | ||
if (value != null) { | ||
fields.put(key, value); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This if condition shouldn't be necessary as the xsd schema file specifies the attribute as
use="required"