From da3c1c3b670062e1ca4089c8add6bf4a5fca3762 Mon Sep 17 00:00:00 2001 From: Stefan Kolb Date: Thu, 3 Mar 2016 21:33:51 +0100 Subject: [PATCH] Simplify parsing logic --- .../wizard/auximport/AuxFileParser.java | 213 +++++++----------- .../wizard/auximport/AuxFileParserTest.java | 1 + 2 files changed, 77 insertions(+), 137 deletions(-) diff --git a/src/main/java/net/sf/jabref/wizard/auximport/AuxFileParser.java b/src/main/java/net/sf/jabref/wizard/auximport/AuxFileParser.java index 9f7505edffc..fa40d40d677 100644 --- a/src/main/java/net/sf/jabref/wizard/auximport/AuxFileParser.java +++ b/src/main/java/net/sf/jabref/wizard/auximport/AuxFileParser.java @@ -9,6 +9,8 @@ import org.apache.commons.logging.LogFactory; import java.io.*; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -21,21 +23,20 @@ public class AuxFileParser { private static final Log LOGGER = LogFactory.getLog(AuxFileParser.class); - private static final Pattern TAG_PATTERN = Pattern.compile("\\\\(citation|abx@aux@cite)\\{(.+)\\}"); - private BibDatabase masterDatabase; - private BibDatabase auxDatabase; + private BibDatabase auxDatabase; private final Set uniqueKeys = new HashSet<>(); - private final List unresolvedKeys = new ArrayList<>(); + private final List unresolvedKeys = new ArrayList<>(); private int nestedAuxCount; + private int crossRefEntriesCount; /** * Generates a database based on the given aux file and BibTeX database * - * @param auxFile Path to the LaTeX aux file + * @param auxFile Path to the LaTeX aux file * @param database BibTeX database */ public AuxFileParser(String auxFile, BibDatabase database) { @@ -101,135 +102,71 @@ public String getInformation(boolean includeMissingEntries) { return result.toString(); } + private static final Pattern CITE_PATTERN = Pattern.compile("\\\\(citation|abx@aux@cite)\\{(.+)\\}"); + private static final Pattern INPUT_PATTERN = Pattern.compile("\\\\@input\\{(.+)\\}"); + /* - * parseAuxFile read the Aux file and fill up some intern data structures. Nested aux files (latex \\include) - * supported! + * Parses the aux file and extracts all bib keys. + * Also supports nested aux files (latex \\include). + * + * There exists no specification of the aux file. + * Every package, class or document can write to the aux file. + * The aux file consists of LaTeX macros and is read at the \begin{document} and again at the \end{document}. * - * // found at comp.text.tex - // > Can anyone tell be the information held within a .aux file? Is there a - // > specific format to this file? - // - // I don't think there is a particular format. Every package, class - // or document can write to the aux file. The aux file consists of LaTeX macros - // and is read at the \begin{document} and again at the \end{document}. - // - // It usually contains information about existing labels - // \\newlabel{sec:Intro}{{1}{1}} - // and citations - // \citation{hiri:conv:1993} - // and macros to write information to other files (like toc, lof or lot files) - // \@writefile{toc}{\contentsline {section}{\numberline - // {1}Intro}{1}} - // but as I said, there can be a lot more - - // aux file : - // - // \\citation{x} x = used reference of bibtex library entry - // - // \\@input{x} x = nested aux file - // - // the \\bibdata{x} directive contains information about the - // bibtex library file -> x = name of bib file - // - // \\bibcite{x}{y} - // x is a label for an item and y is the index in bibliography - * @param filename String : Path to LatexAuxFile - * @return boolean, true = no error occurs + * BibTeX citation: \citation{x,y,z} + * Biblatex citation: \abx@aux@cite{x,y,z} + * Nested aux files: \@input{x} */ - private boolean parseAuxFile(String filename) { - // regular expressions - Matcher matcher; - - // file list, used for nested aux files - List fileList = new ArrayList<>(5); - fileList.add(filename); - - // get the file path - File dummy = new File(filename); - String path = dummy.getParent(); - if (path == null) { - path = ""; - } else { - path = path + File.separator; - } + private void parseAuxFile(String filename) { + // nested aux files + List fileList = Arrays.asList(filename); - nestedAuxCount = -1; // count only the nested reads - - // index of current file in list int fileIndex = 0; - // while condition - boolean cont; while (fileIndex < fileList.size()) { - String fName = fileList.get(fileIndex); - try (BufferedReader br = new BufferedReader(new FileReader(fName))) { - cont = true; - - while (cont) { - Optional maybeLine; - try { - maybeLine = Optional.ofNullable(br.readLine()); - } catch (IOException ioe) { - maybeLine = Optional.empty(); - } + String file = fileList.get(fileIndex); + + try (BufferedReader br = new BufferedReader(new FileReader(file))) { + String line; - if (maybeLine.isPresent()) { - String line = maybeLine.get(); - matcher = TAG_PATTERN.matcher(line); - - while (matcher.find()) { - // extract the bibtex-key(s) XXX from \citation{XXX} string - int len = matcher.end() - matcher.start(); - if (len > 11) { - String str = matcher.group(2); - // could be an comma separated list of keys - String[] keys = str.split(","); - for (String dummyStr : keys) { - // delete all unnecessary blanks and save key into an set - uniqueKeys.add(dummyStr.trim()); - } - } + while ((line = br.readLine()) != null) { + Matcher citeMatch = CITE_PATTERN.matcher(line); + + while (citeMatch.find()) { + String keyString = citeMatch.group(2); + String[] keys = keyString.split(","); + + for (String key : keys) { + uniqueKeys.add(key.trim()); } - // try to find a nested aux file - int index = line.indexOf("\\@input{"); - if (index >= 0) { - int start = index + 8; - int end = line.indexOf('}', start); - if (end > start) { - String str = path + line.substring(index + 8, end); - - // if filename already in file list - if (!fileList.contains(str)) { - fileList.add(str); // insert file into file list - } - } + } + + Matcher inputMatch = INPUT_PATTERN.matcher(line); + + while (inputMatch.find()) { + String inputString = citeMatch.group(2); + String inputFile = new File(filename).toPath().resolve(inputString).toString(); + + if (!fileList.contains(inputFile)) { + fileList.add(inputFile); + nestedAuxCount++; } - } else { - cont = false; } } - nestedAuxCount++; } catch (FileNotFoundException e) { - LOGGER.info("Cannot locate input file!", e); + LOGGER.info("Cannot locate input file", e); } catch (IOException e) { - LOGGER.warn("Problem opening file!", e); + LOGGER.warn("Problem opening file", e); } - fileIndex++; // load next file + fileIndex++; } - - return true; } /* * Try to find an equivalent BibTeX entry inside the reference database for all keys inside the aux file. */ private void resolveTags() { - auxDatabase = new BibDatabase(); - unresolvedKeys.clear(); - - // for all bibtex keys (found in aux-file) try to find an equivalent - // entry into reference database for (String key : uniqueKeys) { BibEntry entry = masterDatabase.getEntryByKey(key); @@ -237,22 +174,7 @@ private void resolveTags() { unresolvedKeys.add(key); } else { insertEntry(entry); - // Check if the entry we just found references another entry which - // we don't already have in our list of entries to include. If so, - // pull in that entry as well: - entry.getFieldOptional("crossref").ifPresent(crossref -> { - if (!uniqueKeys.contains(crossref)) { - BibEntry refEntry = masterDatabase.getEntryByKey(crossref); - - if (refEntry == null) { - unresolvedKeys.add(crossref); - } else { - insertEntry(refEntry); - crossRefEntriesCount++; - } - } - }); - + resolveCrossReferences(entry); } } @@ -262,24 +184,41 @@ private void resolveTags() { } /* - * Copy the database's configuration, i.e., preamble and strings. + * Resolves and adds CrossRef entries */ - private void copyDatabaseConfiguration() { - auxDatabase.setPreamble(masterDatabase.getPreamble()); - Set keys = masterDatabase.getStringKeySet(); - for (String key : keys) { - BibtexString string = masterDatabase.getString(key); - auxDatabase.addString(string); - } + private void resolveCrossReferences(BibEntry entry) { + entry.getFieldOptional("crossref").ifPresent(crossref -> { + if (!uniqueKeys.contains(crossref)) { + BibEntry refEntry = masterDatabase.getEntryByKey(crossref); + + if (refEntry == null) { + unresolvedKeys.add(crossref); + } else { + insertEntry(refEntry); + crossRefEntriesCount++; + } + } + }); } /* - * Insert a clone of the given entry. - * The clone is given a new unique ID. + * Insert a clone of the given entry. The clone is given a new unique ID. */ private void insertEntry(BibEntry entry) { BibEntry clonedEntry = (BibEntry) entry.clone(); clonedEntry.setId(IdGenerator.next()); auxDatabase.insertEntry(clonedEntry); } + + /* + * Copy the database's configuration, i.e., preamble and strings. + */ + private void copyDatabaseConfiguration() { + auxDatabase.setPreamble(masterDatabase.getPreamble()); + Set keys = masterDatabase.getStringKeySet(); + for (String key : keys) { + BibtexString string = masterDatabase.getString(key); + auxDatabase.addString(string); + } + } } diff --git a/src/test/java/net/sf/jabref/wizard/auximport/AuxFileParserTest.java b/src/test/java/net/sf/jabref/wizard/auximport/AuxFileParserTest.java index ebcfc206af0..bca94ce780a 100644 --- a/src/test/java/net/sf/jabref/wizard/auximport/AuxFileParserTest.java +++ b/src/test/java/net/sf/jabref/wizard/auximport/AuxFileParserTest.java @@ -70,4 +70,5 @@ public void testNotAllFound() { // TODO strings and preamble test // TODO return type of generate during error should be false + // TODO resolve nested aux files }