Skip to content

Commit

Permalink
Remove EnglishStemAnalyzer and use EnglishAnalyzer (#11301)
Browse files Browse the repository at this point in the history
  • Loading branch information
LoayGhreeb committed May 17, 2024
1 parent 9fdc294 commit b12f65c
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 33 deletions.
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/logic/pdf/search/PdfIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
import org.jabref.model.pdf.search.EnglishStemAnalyzer;
import org.jabref.model.pdf.search.SearchFieldConstants;
import org.jabref.preferences.FilePreferences;

import com.google.common.annotations.VisibleForTesting;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexNotFoundException;
Expand Down Expand Up @@ -130,7 +130,7 @@ private void initializeIndexWriterAndReader(IndexWriterConfig.OpenMode mode) {
indexWriter = new IndexWriter(
indexDirectory,
new IndexWriterConfig(
new EnglishStemAnalyzer()).setOpenMode(mode));
new EnglishAnalyzer()).setOpenMode(mode));
} catch (IOException e) {
LOGGER.error("Could not initialize the IndexWriter", e);
// FIXME: This can also happen if another instance of JabRef is launched in parallel.
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/org/jabref/logic/pdf/search/PdfSearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
import java.util.Optional;

import org.jabref.gui.LibraryTab;
import org.jabref.model.pdf.search.EnglishStemAnalyzer;
import org.jabref.model.pdf.search.PdfSearchResults;
import org.jabref.model.pdf.search.SearchResult;
import org.jabref.model.strings.StringUtil;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
Expand All @@ -31,7 +32,7 @@ public final class PdfSearcher {
private static final Logger LOGGER = LoggerFactory.getLogger(LibraryTab.class);

private final PdfIndexer indexer;
private EnglishStemAnalyzer englishStemAnalyzer = new EnglishStemAnalyzer();
private final Analyzer englishAnalyzer = new EnglishAnalyzer();

private PdfSearcher(PdfIndexer indexer) {
this.indexer = indexer;
Expand Down Expand Up @@ -65,7 +66,7 @@ public PdfSearchResults search(final String searchString, final int maxHits) thr
return new PdfSearchResults();
}
try (IndexReader reader = DirectoryReader.open(optionalIndexWriter.get())) {
Query query = new MultiFieldQueryParser(PDF_FIELDS, englishStemAnalyzer).parse(searchString);
Query query = new MultiFieldQueryParser(PDF_FIELDS, englishAnalyzer).parse(searchString);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs results = searcher.search(query, maxHits);
for (ScoreDoc scoreDoc : results.scoreDocs) {
Expand Down
25 changes: 0 additions & 25 deletions src/main/java/org/jabref/model/pdf/search/EnglishStemAnalyzer.java

This file was deleted.

10 changes: 7 additions & 3 deletions src/main/java/org/jabref/model/pdf/search/SearchResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import org.jabref.model.entry.BibEntry;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -46,14 +48,16 @@ public SearchResult(IndexSearcher searcher, Query query, ScoreDoc scoreDoc) thro

Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<b>", "</b>"), new QueryScorer(query));

try (TokenStream contentStream = new EnglishStemAnalyzer().tokenStream(CONTENT, content)) {
try (Analyzer analyzer = new EnglishAnalyzer();
TokenStream contentStream = analyzer.tokenStream(CONTENT, content)) {
TextFragment[] frags = highlighter.getBestTextFragments(contentStream, content, true, 10);
this.contentResultStringsHtml = Arrays.stream(frags).map(TextFragment::toString).collect(Collectors.toList());
} catch (InvalidTokenOffsetsException e) {
this.contentResultStringsHtml = List.of();
}

try (TokenStream annotationStream = new EnglishStemAnalyzer().tokenStream(ANNOTATIONS, annotations)) {
try (Analyzer analyzer = new EnglishAnalyzer();
TokenStream annotationStream = analyzer.tokenStream(ANNOTATIONS, annotations)) {
TextFragment[] frags = highlighter.getBestTextFragments(annotationStream, annotations, true, 10);
this.annotationsResultStringsHtml = Arrays.stream(frags).map(TextFragment::toString).collect(Collectors.toList());
} catch (InvalidTokenOffsetsException e) {
Expand All @@ -62,7 +66,7 @@ public SearchResult(IndexSearcher searcher, Query query, ScoreDoc scoreDoc) thro
}

private String getFieldContents(IndexSearcher searcher, ScoreDoc scoreDoc, String field) throws IOException {
IndexableField indexableField = searcher.doc(scoreDoc.doc).getField(field);
IndexableField indexableField = searcher.storedFields().document(scoreDoc.doc).getField(field);
if (indexableField == null) {
return "";
}
Expand Down

0 comments on commit b12f65c

Please sign in to comment.