Skip to content

Commit

Permalink
[Analysis] Deprecate Standard Html Strip Analyzer
Browse files Browse the repository at this point in the history
Deprecate only Standard Html Strip Analyzer
If user create index with the analyzer since 7.0, es throws an exception.
If an index was created before 7.0, es issue deprecation log
We will remove it in 8.0

Related elastic#4704
  • Loading branch information
johtani committed Nov 13, 2018
1 parent a18b599 commit 9990d17
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 4 deletions.
8 changes: 8 additions & 0 deletions docs/reference/migration/migrate_7_0/analysis.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,11 @@ instead.
==== `standard` filter has been removed

The `standard` token filter has been removed because it doesn't change anything in the stream.

[float]
==== Deprecated standard_html_strip analyzer

The `standard_html_strip` analyzer has been deprecated, and should be replaced
with a combination of the `standard` tokenizer and `html_strip` char_filter.
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
but it will not be possible to create new indexes using it.
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ public List<ScriptContext<?>> getContexts() {
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);

// TODO remove in 8.0
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new);
Expand Down Expand Up @@ -321,6 +323,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase {
public StandardHtmlStripAnalyzer() {
super(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
}

/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
super(stopwords);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.CharArraySet;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
Expand All @@ -28,14 +31,30 @@

public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {

private static final DeprecationLogger DEPRECATION_LOGGER =
new DeprecationLogger(Loggers.getLogger(StandardHtmlStripAnalyzerProvider.class));

private final StandardHtmlStripAnalyzer analyzer;

/**
* @deprecated in 6.5, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_7_0_0_alpha1)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_5_0)) {
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,21 @@

package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;
import org.junit.Assert;

import java.io.IOException;
import java.io.StringReader;
Expand Down Expand Up @@ -116,4 +120,70 @@ public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException {
assertNotNull(tokenFilterFactory.create(tokenizer));
}
}


/**
* Check that the deprecated analyzer name "standard_html_strip" throws exception for indices created since 7.0.0
*/
public void testStandardHtmlStripAnalyzerDeprecationError() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_7_0_0_alpha1, Version.CURRENT))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
Assert.fail("[standard_html_strip] is created");
} catch (IllegalArgumentException iae) {
assertEquals(iae.getMessage(), "[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
} catch (Exception e) {
fail("expected IAE");
}
}

/**
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created since 6.5.0 until 7
*/
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_6_5_0, Version.V_6_6_0))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
Analyzer analyzer = analyzers.get("custom_analyzer");
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
assertWarnings(
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}

/**
* Check that the deprecated analyzer name "standard_html_strip" does NOT issue a deprecation warning for indices created before 6.4.0
*/
public void testStandardHtmlStripAnalyzerNoDeprecationPre6_5() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_6_0_0_alpha1, Version.V_6_4_0))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
Analyzer analyzer = analyzers.get("custom_analyzer");

assertNotNull(((NamedAnalyzer) analyzer).analyzer());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,15 @@

---
"standard_html_strip":
- skip:
version: " - 6.99.99"
reason: only starting from version 7.x this throws an error
- do:
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
indices.analyze:
body:
text: <bold/> <italic/>
analyzer: standard_html_strip
- length: { tokens: 2 }
- match: { tokens.0.token: bold }
- match: { tokens.1.token: italic }

---
"pattern":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
Expand Down Expand Up @@ -51,6 +54,7 @@ public final class AnalysisRegistry implements Closeable {
public static final String INDEX_ANALYSIS_CHAR_FILTER = "index.analysis.char_filter";
public static final String INDEX_ANALYSIS_FILTER = "index.analysis.filter";
public static final String INDEX_ANALYSIS_TOKENIZER = "index.analysis.tokenizer";
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(AnalysisRegistry.class));
private final PrebuiltAnalysis prebuiltAnalysis;
private final Map<String, Analyzer> cachedAnalyzer = new ConcurrentHashMap<>();

Expand Down Expand Up @@ -130,7 +134,13 @@ public Analyzer getAnalyzer(String analyzer) throws IOException {
throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
}}
);
} else if ("standard_html_strip".equals(analyzer)) {
if (Version.CURRENT.onOrAfter(Version.V_7_0_0_alpha1)) {
throw new IllegalArgumentException("[standard_html_strip] analyzer is not supported for new indices, " +
"use a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}

return analyzerProvider.get(environment, analyzer).get();
}

Expand Down

0 comments on commit 9990d17

Please sign in to comment.