Skip to content

Commit

Permalink
[Analysis] Deprecate Standard Html Strip Analyzer in 6.x
Browse files Browse the repository at this point in the history
Backport elastic#26719 to 6.x

Closes elastic#4704

(cherry picked from commit 38b698d)
  • Loading branch information
johtani committed Jan 17, 2019
1 parent 9c8a387 commit e942d63
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 4 deletions.
10 changes: 9 additions & 1 deletion docs/reference/migration/migrate_6_0/analysis.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,12 @@ is not set. A deprecation warning will be issued when an analyzed text exceeds 1
[float]
==== `standard` filter has been deprecated
The `standard` token filter has been deprecated because it doesn't change anything in
the stream. It will be removed in the next major version.
the stream. It will be removed in the next major version.

[float]
==== Deprecated standard_html_strip analyzer

The `standard_html_strip` analyzer has been deprecated, and should be replaced
with a combination of the `standard` tokenizer and `html_strip` char_filter.
Indexes created using this analyzer will still be readable in elasticsearch 7.0,
but it will not be possible to create new indexes using it.
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ public List<ScriptContext<?>> getContexts() {
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);

// TODO remove in 8.0
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new);
Expand Down Expand Up @@ -320,6 +322,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ public StandardHtmlStripAnalyzer() {
super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}

/**
* @deprecated in 6.7, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzer(CharArraySet stopwords) {
super(stopwords);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

package org.elasticsearch.analysis.common;

import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.CharArraySet;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
Expand All @@ -28,14 +30,24 @@

public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProvider<StandardHtmlStripAnalyzer> {

private static final DeprecationLogger DEPRECATION_LOGGER =
new DeprecationLogger(LogManager.getLogger(StandardHtmlStripAnalyzerProvider.class));

private final StandardHtmlStripAnalyzer analyzer;

/**
* @deprecated in 6.7, can not create in 7.0, and we remove this in 8.0
*/
@Deprecated
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
CharArraySet stopWords = Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, defaultStopwords);
analyzer = new StandardHtmlStripAnalyzer(stopWords);
analyzer.setVersion(version);
DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_html_strip_deprecation",
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@

package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
Expand Down Expand Up @@ -116,4 +119,27 @@ public void testEdgeNGramNoDeprecationWarningPre6_4() throws IOException {
assertNotNull(tokenFilterFactory.create(tokenizer));
}
}


/**
* Check that the deprecated analyzer name "standard_html_strip" issues a deprecation warning for indices created until 7
*/
public void testStandardHtmlStripAnalyzerDeprecationWarning() throws IOException {
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED,
VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.CURRENT))
.put("index.analysis.analyzer.custom_analyzer.type", "standard_html_strip")
.putList("index.analysis.analyzer.custom_analyzer.stopwords", "a", "b")
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
IndexAnalyzers analyzers = createTestAnalysis(idxSettings, settings, commonAnalysisPlugin).indexAnalyzers;
Analyzer analyzer = analyzers.get("custom_analyzer");
assertNotNull(((NamedAnalyzer) analyzer).analyzer());
assertWarnings(
"Deprecated analyzer [standard_html_strip] used, " +
"replace it with a custom analyzer using [standard] tokenizer and [html_strip] char_filter, plus [lowercase] filter");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,15 @@

---
"standard_html_strip":
- skip:
version: " - 6.99.99"
reason: only starting from version 7.x this throws an error
- do:
catch: /\[standard_html_strip\] analyzer is not supported for new indices, use a custom analyzer using \[standard\] tokenizer and \[html_strip\] char_filter, plus \[lowercase\] filter/
indices.analyze:
body:
text: <bold/> <italic/>
analyzer: standard_html_strip
- length: { tokens: 2 }
- match: { tokens.0.token: bold }
- match: { tokens.1.token: italic }

---
"pattern":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ public Analyzer getAnalyzer(String analyzer) throws IOException {
throw new ElasticsearchException("failed to load analyzer for name " + key, ex);
}}
);
} else if ("standard_html_strip".equals(analyzer)) {
Logger logger = LogManager.getLogger(getClass());
DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
deprecationLogger.deprecated("[standard_html_strip] analyzer is deprecated, use a custom analyzer using [standard] tokenizer " +
"and [html_strip] char_filter, plus [lowercase] filter");
}
return analyzerProvider.get(environment, analyzer).get();
}
Expand Down

0 comments on commit e942d63

Please sign in to comment.