Skip to content

Commit

Permalink
More resource efficient analysis wrapping usage
Browse files Browse the repository at this point in the history
Today, we take great care to try and share the same analyzer instances across shards and indices (global analyzer). The idea is to share the same analyzer so the thread local resource it has will not be allocated per analyzer instance per thread.
The problem is that AnalyzerWrapper keeps its resources on its own per thread storage, and with per field reuse strategy, it causes for per field per thread token stream components to be used. This is very evident with the StandardTokenizer that uses a buffer...
This came out of test with "many fields", where the majority of 1GB heap was consumed by StandardTokenizer instances...
closes #6714
  • Loading branch information
kimchy committed Jul 3, 2014
1 parent 388fddb commit 5249005
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 104 deletions.

This file was deleted.

@@ -0,0 +1,66 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.lucene.analysis;

import java.io.Reader;

/**
* A simple analyzer wrapper, that doesn't allow to wrap components or reader. By disallowing
* it, it means that the thread local resources will be delegated to the wrapped analyzer, and not
* also be allocated on this analyzer.
*
* This solves the problem of per field analyzer wrapper, where it also maintains a thread local
* per field token stream components, while it can safely delegate those and not also hold these
* data structures, which can become expensive memory wise.
*/
public abstract class SimpleAnalyzerWrapper extends AnalyzerWrapper {

public SimpleAnalyzerWrapper() {
super(new DelegatingReuseStrategy());
((DelegatingReuseStrategy) getReuseStrategy()).wrapper = this;
}

@Override
protected final TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return super.wrapComponents(fieldName, components);
}

@Override
protected final Reader wrapReader(String fieldName, Reader reader) {
return super.wrapReader(fieldName, reader);
}

private static class DelegatingReuseStrategy extends ReuseStrategy {

AnalyzerWrapper wrapper;

@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
return wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName);
}

@Override
public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) {
Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
wrappedAnalyzer.getReuseStrategy().setReusableComponents(wrappedAnalyzer, fieldName, components);
}
}
}
Expand Up @@ -20,13 +20,13 @@
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
import org.elasticsearch.common.collect.ImmutableOpenMap;

/**
*
*/
public final class FieldNameAnalyzer extends AnalyzerWrapper {
public final class FieldNameAnalyzer extends SimpleAnalyzerWrapper {

private final ImmutableOpenMap<String, Analyzer> analyzers;

Expand All @@ -50,11 +50,6 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
return getAnalyzer(fieldName);
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}

private Analyzer getAnalyzer(String name) {
Analyzer analyzer = analyzers.get(name);
if (analyzer != null) {
Expand Down
Expand Up @@ -20,13 +20,13 @@
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CustomAnalyzerWrapper;
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;

/**
* Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated
* with a name ({@link #name()}.
*/
public class NamedAnalyzer extends CustomAnalyzerWrapper {
public class NamedAnalyzer extends SimpleAnalyzerWrapper {

private final String name;
private final AnalyzerScope scope;
Expand All @@ -46,7 +46,6 @@ public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer) {
}

public NamedAnalyzer(String name, AnalyzerScope scope, Analyzer analyzer, int positionOffsetGap) {
super(analyzer.getReuseStrategy());
this.name = name;
this.scope = scope;
this.analyzer = analyzer;
Expand Down Expand Up @@ -79,11 +78,6 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
return this.analyzer;
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}

@Override
public int getPositionIncrementGap(String fieldName) {
if (positionOffsetGap != Integer.MIN_VALUE) {
Expand Down
15 changes: 3 additions & 12 deletions src/main/java/org/elasticsearch/index/mapper/MapperService.java
Expand Up @@ -24,6 +24,7 @@
import com.google.common.collect.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.SimpleAnalyzerWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.queries.TermFilter;
Expand Down Expand Up @@ -976,7 +977,7 @@ public Analyzer searchQuoteAnalyzer() {
}
}

final class SmartIndexNameSearchAnalyzer extends AnalyzerWrapper {
final class SmartIndexNameSearchAnalyzer extends SimpleAnalyzerWrapper {

private final Analyzer defaultAnalyzer;

Expand Down Expand Up @@ -1005,14 +1006,9 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
}
return defaultAnalyzer;
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}
}

final class SmartIndexNameSearchQuoteAnalyzer extends AnalyzerWrapper {
final class SmartIndexNameSearchQuoteAnalyzer extends SimpleAnalyzerWrapper {

private final Analyzer defaultAnalyzer;

Expand Down Expand Up @@ -1041,11 +1037,6 @@ protected Analyzer getWrappedAnalyzer(String fieldName) {
}
return defaultAnalyzer;
}

@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}
}

class InternalFieldMapperListener extends FieldMapperListener {
Expand Down

0 comments on commit 5249005

Please sign in to comment.