Skip to content

Commit

Permalink
Simplify parsing logic
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-kolb committed Mar 3, 2016
1 parent d13ae87 commit da3c1c3
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 137 deletions.
213 changes: 76 additions & 137 deletions src/main/java/net/sf/jabref/wizard/auximport/AuxFileParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import org.apache.commons.logging.LogFactory;

import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -21,21 +23,20 @@
public class AuxFileParser {
private static final Log LOGGER = LogFactory.getLog(AuxFileParser.class);

private static final Pattern TAG_PATTERN = Pattern.compile("\\\\(citation|abx@aux@cite)\\{(.+)\\}");

private BibDatabase masterDatabase;
private BibDatabase auxDatabase;

private BibDatabase auxDatabase;
private final Set<String> uniqueKeys = new HashSet<>();
private final List<String> unresolvedKeys = new ArrayList<>();

private final List<String> unresolvedKeys = new ArrayList<>();
private int nestedAuxCount;

private int crossRefEntriesCount;

/**
* Generates a database based on the given aux file and BibTeX database
*
* @param auxFile Path to the LaTeX aux file
* @param auxFile Path to the LaTeX aux file
* @param database BibTeX database
*/
public AuxFileParser(String auxFile, BibDatabase database) {
Expand Down Expand Up @@ -101,158 +102,79 @@ public String getInformation(boolean includeMissingEntries) {
return result.toString();
}

private static final Pattern CITE_PATTERN = Pattern.compile("\\\\(citation|abx@aux@cite)\\{(.+)\\}");
private static final Pattern INPUT_PATTERN = Pattern.compile("\\\\@input\\{(.+)\\}");

/*
* parseAuxFile read the Aux file and fill up some intern data structures. Nested aux files (latex \\include)
* supported!
* Parses the aux file and extracts all bib keys.
* Also supports nested aux files (latex \\include).
*
* There exists no specification of the aux file.
* Every package, class or document can write to the aux file.
* The aux file consists of LaTeX macros and is read at the \begin{document} and again at the \end{document}.
*
* // found at comp.text.tex
// > Can anyone tell be the information held within a .aux file? Is there a
// > specific format to this file?
//
// I don't think there is a particular format. Every package, class
// or document can write to the aux file. The aux file consists of LaTeX macros
// and is read at the \begin{document} and again at the \end{document}.
//
// It usually contains information about existing labels
// \\newlabel{sec:Intro}{{1}{1}}
// and citations
// \citation{hiri:conv:1993}
// and macros to write information to other files (like toc, lof or lot files)
// \@writefile{toc}{\contentsline {section}{\numberline
// {1}Intro}{1}}
// but as I said, there can be a lot more
// aux file :
//
// \\citation{x} x = used reference of bibtex library entry
//
// \\@input{x} x = nested aux file
//
// the \\bibdata{x} directive contains information about the
// bibtex library file -> x = name of bib file
//
// \\bibcite{x}{y}
// x is a label for an item and y is the index in bibliography
* @param filename String : Path to LatexAuxFile
* @return boolean, true = no error occurs
* BibTeX citation: \citation{x,y,z}
* Biblatex citation: \abx@aux@cite{x,y,z}
* Nested aux files: \@input{x}
*/
private boolean parseAuxFile(String filename) {
// regular expressions
Matcher matcher;

// file list, used for nested aux files
List<String> fileList = new ArrayList<>(5);
fileList.add(filename);

// get the file path
File dummy = new File(filename);
String path = dummy.getParent();
if (path == null) {
path = "";
} else {
path = path + File.separator;
}
private void parseAuxFile(String filename) {
// nested aux files
List<String> fileList = Arrays.asList(filename);

nestedAuxCount = -1; // count only the nested reads

// index of current file in list
int fileIndex = 0;

// while condition
boolean cont;
while (fileIndex < fileList.size()) {
String fName = fileList.get(fileIndex);
try (BufferedReader br = new BufferedReader(new FileReader(fName))) {
cont = true;

while (cont) {
Optional<String> maybeLine;
try {
maybeLine = Optional.ofNullable(br.readLine());
} catch (IOException ioe) {
maybeLine = Optional.empty();
}
String file = fileList.get(fileIndex);

try (BufferedReader br = new BufferedReader(new FileReader(file))) {
String line;

if (maybeLine.isPresent()) {
String line = maybeLine.get();
matcher = TAG_PATTERN.matcher(line);

while (matcher.find()) {
// extract the bibtex-key(s) XXX from \citation{XXX} string
int len = matcher.end() - matcher.start();
if (len > 11) {
String str = matcher.group(2);
// could be an comma separated list of keys
String[] keys = str.split(",");
for (String dummyStr : keys) {
// delete all unnecessary blanks and save key into an set
uniqueKeys.add(dummyStr.trim());
}
}
while ((line = br.readLine()) != null) {
Matcher citeMatch = CITE_PATTERN.matcher(line);

while (citeMatch.find()) {
String keyString = citeMatch.group(2);
String[] keys = keyString.split(",");

for (String key : keys) {
uniqueKeys.add(key.trim());
}
// try to find a nested aux file
int index = line.indexOf("\\@input{");
if (index >= 0) {
int start = index + 8;
int end = line.indexOf('}', start);
if (end > start) {
String str = path + line.substring(index + 8, end);

// if filename already in file list
if (!fileList.contains(str)) {
fileList.add(str); // insert file into file list
}
}
}

Matcher inputMatch = INPUT_PATTERN.matcher(line);

while (inputMatch.find()) {
String inputString = citeMatch.group(2);
String inputFile = new File(filename).toPath().resolve(inputString).toString();

if (!fileList.contains(inputFile)) {
fileList.add(inputFile);
nestedAuxCount++;
}
} else {
cont = false;
}
}
nestedAuxCount++;
} catch (FileNotFoundException e) {
LOGGER.info("Cannot locate input file!", e);
LOGGER.info("Cannot locate input file", e);
} catch (IOException e) {
LOGGER.warn("Problem opening file!", e);
LOGGER.warn("Problem opening file", e);
}

fileIndex++; // load next file
fileIndex++;
}

return true;
}

/*
* Try to find an equivalent BibTeX entry inside the reference database for all keys inside the aux file.
*/
private void resolveTags() {
auxDatabase = new BibDatabase();
unresolvedKeys.clear();

// for all bibtex keys (found in aux-file) try to find an equivalent
// entry into reference database
for (String key : uniqueKeys) {
BibEntry entry = masterDatabase.getEntryByKey(key);

if (entry == null) {
unresolvedKeys.add(key);
} else {
insertEntry(entry);
// Check if the entry we just found references another entry which
// we don't already have in our list of entries to include. If so,
// pull in that entry as well:
entry.getFieldOptional("crossref").ifPresent(crossref -> {
if (!uniqueKeys.contains(crossref)) {
BibEntry refEntry = masterDatabase.getEntryByKey(crossref);

if (refEntry == null) {
unresolvedKeys.add(crossref);
} else {
insertEntry(refEntry);
crossRefEntriesCount++;
}
}
});

resolveCrossReferences(entry);
}
}

Expand All @@ -262,24 +184,41 @@ private void resolveTags() {
}

/*
* Copy the database's configuration, i.e., preamble and strings.
* Resolves and adds CrossRef entries
*/
private void copyDatabaseConfiguration() {
auxDatabase.setPreamble(masterDatabase.getPreamble());
Set<String> keys = masterDatabase.getStringKeySet();
for (String key : keys) {
BibtexString string = masterDatabase.getString(key);
auxDatabase.addString(string);
}
private void resolveCrossReferences(BibEntry entry) {
entry.getFieldOptional("crossref").ifPresent(crossref -> {
if (!uniqueKeys.contains(crossref)) {
BibEntry refEntry = masterDatabase.getEntryByKey(crossref);

if (refEntry == null) {
unresolvedKeys.add(crossref);
} else {
insertEntry(refEntry);
crossRefEntriesCount++;
}
}
});
}

/*
* Insert a clone of the given entry.
* The clone is given a new unique ID.
* Insert a clone of the given entry. The clone is given a new unique ID.
*/
private void insertEntry(BibEntry entry) {
BibEntry clonedEntry = (BibEntry) entry.clone();
clonedEntry.setId(IdGenerator.next());
auxDatabase.insertEntry(clonedEntry);
}

/*
* Copy the database's configuration, i.e., preamble and strings.
*/
private void copyDatabaseConfiguration() {
auxDatabase.setPreamble(masterDatabase.getPreamble());
Set<String> keys = masterDatabase.getStringKeySet();
for (String key : keys) {
BibtexString string = masterDatabase.getString(key);
auxDatabase.addString(string);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,5 @@ public void testNotAllFound() {

// TODO strings and preamble test
// TODO return type of generate during error should be false
// TODO resolve nested aux files
}

0 comments on commit da3c1c3

Please sign in to comment.