From 9990281e7d1d8be493e027482bf9b917732f9b21 Mon Sep 17 00:00:00 2001 From: Tobias Diez Date: Wed, 6 Apr 2016 15:58:17 +0200 Subject: [PATCH 1/3] Improve performance of db write by reworking FieldContentParser --- .../jabref/exporter/LatexFieldFormatter.java | 11 +++--- .../fileformat/FieldContentParser.java | 38 +++++++++---------- .../jabref/logic/util/strings/StringUtil.java | 7 +--- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/main/java/net/sf/jabref/exporter/LatexFieldFormatter.java b/src/main/java/net/sf/jabref/exporter/LatexFieldFormatter.java index d0280fce1ea..47159ff51b4 100644 --- a/src/main/java/net/sf/jabref/exporter/LatexFieldFormatter.java +++ b/src/main/java/net/sf/jabref/exporter/LatexFieldFormatter.java @@ -15,15 +15,16 @@ */ package net.sf.jabref.exporter; -import net.sf.jabref.*; +import java.util.ArrayList; +import java.util.List; + +import net.sf.jabref.Globals; +import net.sf.jabref.JabRefPreferences; import net.sf.jabref.bibtex.InternalBibtexFields; import net.sf.jabref.gui.GUIGlobals; import net.sf.jabref.importer.fileformat.FieldContentParser; import net.sf.jabref.logic.util.strings.StringUtil; -import java.util.ArrayList; -import java.util.List; - /** * Currently the only implementation of net.sf.jabref.exporter.FieldFormatter *

@@ -171,7 +172,7 @@ private String formatAndResolveStrings(String content, String fieldName) { } } - return parser.format(stringBuilder.toString(), fieldName); + return parser.format(stringBuilder, fieldName); } private boolean shouldResolveStrings(String fieldName) { diff --git a/src/main/java/net/sf/jabref/importer/fileformat/FieldContentParser.java b/src/main/java/net/sf/jabref/importer/fileformat/FieldContentParser.java index 1ee89f3bde3..ed522086cf6 100644 --- a/src/main/java/net/sf/jabref/importer/fileformat/FieldContentParser.java +++ b/src/main/java/net/sf/jabref/importer/fileformat/FieldContentParser.java @@ -15,23 +15,27 @@ */ package net.sf.jabref.importer.fileformat; +import java.util.HashSet; +import java.util.List; +import java.util.regex.Pattern; + import net.sf.jabref.Globals; import net.sf.jabref.JabRefPreferences; import net.sf.jabref.logic.util.strings.StringUtil; -import java.util.ArrayList; -import java.util.List; - /** * This class provides the reformatting needed when reading BibTeX fields formatted * in JabRef style. The reformatting must undo all formatting done by JabRef when * writing the same fields. */ public class FieldContentParser { - private final List multiLineFields; + private final HashSet multiLineFields; + + // 's' matches a space, tab, new line, carriage return. + private static final Pattern WHITESPACE = Pattern.compile("\\s+"); public FieldContentParser() { - multiLineFields = new ArrayList<>(); + multiLineFields = new HashSet<>(); // the following two are also coded in net.sf.jabref.exporter.LatexFieldFormatter.format(String, String) multiLineFields.add("abstract"); multiLineFields.add("review"); @@ -43,27 +47,21 @@ public FieldContentParser() { /** * Performs the reformatting * - * @param text2 StringBuffer containing the field to format. bibtexField contains field name according to field - * @param bibtexField - * @return The formatted field content. The StringBuffer returned may or may not be the same as the argument given. + * @param fieldContent the content to format + * @param bibtexField the name of the bibtex field + * @return the formatted field content. */ - public StringBuilder format(StringBuilder text2, String bibtexField) { - - // Unify line breaks - String text = StringUtil.unifyLineBreaksToConfiguredLineBreaks(text2.toString()); + public String format(String fieldContent, String bibtexField) { - // Do not format multiline fields if (multiLineFields.contains(bibtexField)) { - return new StringBuilder(text); + // Unify line breaks + return StringUtil.unifyLineBreaksToConfiguredLineBreaks(fieldContent); } - // 's' matches a space, tab, new line, carriage return. - text = text.replaceAll("\\s+", " "); - - return new StringBuilder(text); + return WHITESPACE.matcher(fieldContent).replaceAll(" "); } - public String format(String content, String bibtexField) { - return format(new StringBuilder(content), bibtexField).toString(); + public String format(StringBuilder fieldContent, String bibtexField) { + return format(fieldContent.toString(), bibtexField); } } diff --git a/src/main/java/net/sf/jabref/logic/util/strings/StringUtil.java b/src/main/java/net/sf/jabref/logic/util/strings/StringUtil.java index 04f92bf0679..8700af7f8b1 100644 --- a/src/main/java/net/sf/jabref/logic/util/strings/StringUtil.java +++ b/src/main/java/net/sf/jabref/logic/util/strings/StringUtil.java @@ -15,15 +15,12 @@ */ package net.sf.jabref.logic.util.strings; -import net.sf.jabref.Globals; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.StringTokenizer; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.base.CharMatcher; +import net.sf.jabref.Globals; public class StringUtil { From 1c4accb842fb25468aaa535793d0f43b54120217 Mon Sep 17 00:00:00 2001 From: Tobias Diez Date: Wed, 6 Apr 2016 16:53:05 +0200 Subject: [PATCH 2/3] Improve performance of search --- .../net/sf/jabref/benchmarks/Benchmarks.java | 9 +++-- .../search/rules/ContainBasedSearchRule.java | 38 ++++++++----------- .../net/sf/jabref/model/entry/BibEntry.java | 15 +++----- 3 files changed, 26 insertions(+), 36 deletions(-) diff --git a/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java b/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java index bf37277dac6..a21e09ab291 100644 --- a/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java +++ b/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java @@ -26,7 +26,7 @@ @State(Scope.Thread) public class Benchmarks { - StringReader bibtexStringReader; + String bibtexString; BibDatabase database = new BibDatabase(); @Setup @@ -34,7 +34,7 @@ public void init() throws IOException, SaveException { Globals.prefs = JabRefPreferences.getInstance(); Random randomizer = new Random(); - for (int i = 0; i < 1000; i++) { + for (int i = 0; i < 100000; i++) { BibEntry entry = new BibEntry(); entry.setCiteKey("id" + i); entry.setField("title", "This is my title " + i); @@ -50,12 +50,13 @@ public void init() throws IOException, SaveException { databaseWriter.writePartOfDatabase(stringWriter, new BibDatabaseContext(database, new MetaData(), new Defaults()), database.getEntries(), new SavePreferences()); - String bibtexString = stringWriter.toString(); - bibtexStringReader = new StringReader(bibtexString); + bibtexString = stringWriter.toString(); + } @Benchmark public ParserResult parse() throws IOException { + StringReader bibtexStringReader = new StringReader(bibtexString); BibtexParser parser = new BibtexParser(bibtexStringReader); return parser.parse(); } diff --git a/src/main/java/net/sf/jabref/logic/search/rules/ContainBasedSearchRule.java b/src/main/java/net/sf/jabref/logic/search/rules/ContainBasedSearchRule.java index 1bc86bd34f5..ece90034db4 100644 --- a/src/main/java/net/sf/jabref/logic/search/rules/ContainBasedSearchRule.java +++ b/src/main/java/net/sf/jabref/logic/search/rules/ContainBasedSearchRule.java @@ -15,6 +15,7 @@ */ package net.sf.jabref.logic.search.rules; +import java.util.Iterator; import java.util.List; import net.sf.jabref.logic.layout.format.RemoveLatexCommands; @@ -50,35 +51,28 @@ public boolean applyRule(String query, BibEntry bibEntry) { searchString = searchString.toLowerCase(); } - List words = new SentenceAnalyzer(searchString).getWords(); + List unmatchedWords = new SentenceAnalyzer(searchString).getWords(); - // We need match for all words: - boolean[] matchFound = new boolean[words.size()]; - - for (String field : bibEntry.getFieldNames()) { - if (bibEntry.hasField(field)) { - String fieldContent = ContainBasedSearchRule.REMOVE_LATEX_COMMANDS.format(bibEntry.getField(field)); - if (!caseSensitive) { - fieldContent = fieldContent.toLowerCase(); - } - - int index = 0; - // Check if we have a match for each of the query words, ignoring - // those words for which we already have a match: - for (String word : words) { - matchFound[index] = matchFound[index] || fieldContent.contains(word); + for (String fieldContent : bibEntry.getFieldValues()) { + String formattedFieldContent = ContainBasedSearchRule.REMOVE_LATEX_COMMANDS.format(fieldContent); + if (!caseSensitive) { + formattedFieldContent = formattedFieldContent.toLowerCase(); + } - index++; + Iterator unmatchedWordsIterator = unmatchedWords.iterator(); + while (unmatchedWordsIterator.hasNext()) { + String word = unmatchedWordsIterator.next(); + if(formattedFieldContent.contains(word)) { + unmatchedWordsIterator.remove(); } } - } - for (boolean aMatchFound : matchFound) { - if (!aMatchFound) { - return false; // Didn't match all words. + if(unmatchedWords.isEmpty()) { + return true; } } - return true; // Matched all words. + + return false; // Didn't match all words. } } diff --git a/src/main/java/net/sf/jabref/model/entry/BibEntry.java b/src/main/java/net/sf/jabref/model/entry/BibEntry.java index d38921f604a..3091ca2fd1e 100644 --- a/src/main/java/net/sf/jabref/model/entry/BibEntry.java +++ b/src/main/java/net/sf/jabref/model/entry/BibEntry.java @@ -24,16 +24,7 @@ import java.text.ParseException; import java.text.ParsePosition; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.TreeSet; +import java.util.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -602,4 +593,8 @@ public void addKeywords(List keywords) { public List getSeparatedKeywords() { return net.sf.jabref.model.entry.EntryUtil.getSeparatedKeywords(this.getField("keywords")); } + + public Collection getFieldValues() { + return fields.values(); + } } From b39c657c2713a6d0bbf52549848304e58249b909 Mon Sep 17 00:00:00 2001 From: Tobias Diez Date: Wed, 6 Apr 2016 17:17:41 +0200 Subject: [PATCH 3/3] Small performance improvements in parser --- .../net/sf/jabref/benchmarks/Benchmarks.java | 2 +- .../importer/fileformat/BibtexParser.java | 30 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java b/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java index a21e09ab291..49b92ae10b4 100644 --- a/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java +++ b/src/jmh/java/net/sf/jabref/benchmarks/Benchmarks.java @@ -34,7 +34,7 @@ public void init() throws IOException, SaveException { Globals.prefs = JabRefPreferences.getInstance(); Random randomizer = new Random(); - for (int i = 0; i < 100000; i++) { + for (int i = 0; i < 1000; i++) { BibEntry entry = new BibEntry(); entry.setCiteKey("id" + i); entry.setField("title", "This is my title " + i); diff --git a/src/main/java/net/sf/jabref/importer/fileformat/BibtexParser.java b/src/main/java/net/sf/jabref/importer/fileformat/BibtexParser.java index b39e3554acb..b5c18a1589b 100644 --- a/src/main/java/net/sf/jabref/importer/fileformat/BibtexParser.java +++ b/src/main/java/net/sf/jabref/importer/fileformat/BibtexParser.java @@ -299,17 +299,12 @@ private void parseBibtexString() throws IOException { * @return the text read so far */ private String dumpTextReadSoFarToString() { - StringBuilder entry = new StringBuilder(); - while (!pureTextFromFile.isEmpty()) { - entry.append(pureTextFromFile.pollFirst()); - } - - String result = entry.toString(); - int indexOfAt = entry.indexOf("@"); + String result = getPureTextFromFile(); + int indexOfAt = result.indexOf("@"); // if there is no entry found, simply return the content (necessary to parse text remaining after the last entry) if (indexOfAt == -1) { - return purgeEOFCharacters(entry); + return purgeEOFCharacters(result); } else { //skip all text except newlines and whitespaces before first @. This is necessary to remove the file header @@ -332,10 +327,17 @@ private String dumpTextReadSoFarToString() { } } - result = result.substring(runningIndex + 1); + return result.substring(runningIndex + 1); + } + } - return result; + private String getPureTextFromFile() { + StringBuilder entry = new StringBuilder(); + while (!pureTextFromFile.isEmpty()) { + entry.append(pureTextFromFile.pollFirst()); } + + return entry.toString(); } /** @@ -343,10 +345,10 @@ private String dumpTextReadSoFarToString() { * * @return a String without eof characters */ - private String purgeEOFCharacters(StringBuilder input) { + private String purgeEOFCharacters(String input) { StringBuilder remainingText = new StringBuilder(); - for (Character character : input.toString().toCharArray()) { + for (Character character : input.toCharArray()) { if (!(isEOFCharacter(character))) { remainingText.append(character); } @@ -441,7 +443,7 @@ private int read() throws IOException { int character = pushbackReader.read(); if(! isEOFCharacter(character)) { - pureTextFromFile.offerLast(Character.valueOf((char) character)); + pureTextFromFile.offerLast((char) character); } if (character == '\n') { line++; @@ -454,7 +456,7 @@ private void unread(int character) throws IOException { line--; } pushbackReader.unread(character); - if(pureTextFromFile.getLast().charValue() == character) { + if(pureTextFromFile.getLast() == character) { pureTextFromFile.pollLast(); } }