Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into no_api_refresh_for_un…
Browse files Browse the repository at this point in the history
…promotables
  • Loading branch information
Tim-Brooks committed Jun 13, 2023
2 parents 7d6ebe0 + cba3e27 commit 1c713c7
Show file tree
Hide file tree
Showing 190 changed files with 10,972 additions and 1,238 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* This project is based on a modification of https://github.com/tdunning/t-digest which is licensed under the Apache 2.0 License.
*/

package org.elasticsearch.benchmark.tdigest;

import org.elasticsearch.tdigest.Sort;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;

/** Explores the performance of Sort on pathological input data. */
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 10, time = 3, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 20, time = 2, timeUnit = TimeUnit.SECONDS)
@Fork(1)
@Threads(1)
@State(Scope.Thread)
public class SortBench {
private final int size = 100000;
private final double[] values = new double[size];

@Param({ "0", "1", "-1" })
public int sortDirection;

@Setup
public void setup() {
Random prng = new Random(999983);
for (int i = 0; i < size; i++) {
values[i] = prng.nextDouble();
}
if (sortDirection > 0) {
Arrays.sort(values);
} else if (sortDirection < 0) {
Arrays.sort(values);
Sort.reverse(values, 0, values.length);
}
}

@Benchmark
public void quicksort() {
int[] order = new int[size];
for (int i = 0; i < size; i++) {
order[i] = i;
}
Sort.sort(order, values, null, values.length);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to Elasticsearch B.V. under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* This project is based on a modification of https://github.com/tdunning/t-digest which is licensed under the Apache 2.0 License.
*/

package org.elasticsearch.benchmark.tdigest;

import org.elasticsearch.tdigest.AVLTreeDigest;
import org.elasticsearch.tdigest.MergingDigest;
import org.elasticsearch.tdigest.TDigest;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.profile.GCProfiler;
import org.openjdk.jmh.profile.StackProfiler;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
@Fork(1)
@Threads(1)
@State(Scope.Thread)
public class TDigestBench {

public enum TDigestFactory {
MERGE {
@Override
TDigest create(double compression) {
return new MergingDigest(compression, (int) (10 * compression));
}
},
AVL_TREE {
@Override
TDigest create(double compression) {
return new AVLTreeDigest(compression);
}
};

abstract TDigest create(double compression);
}

@Param({ "100", "300" })
double compression;

@Param({ "MERGE", "AVL_TREE" })
TDigestFactory tdigestFactory;

@Param({ "NORMAL", "GAUSSIAN" })
String distribution;

Random random;
TDigest tdigest;

double[] data = new double[1000000];

@Setup
public void setUp() {
random = ThreadLocalRandom.current();
tdigest = tdigestFactory.create(compression);

Supplier<Double> nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble();
for (int i = 0; i < 10000; ++i) {
tdigest.add(nextRandom.get());
}

for (int i = 0; i < data.length; ++i) {
data[i] = nextRandom.get();
}
}

@State(Scope.Thread)
public static class ThreadState {
int index = 0;
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void add(ThreadState state) {
if (state.index >= data.length) {
state.index = 0;
}
tdigest.add(data[state.index++]);
}

public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(".*" + TDigestBench.class.getSimpleName() + ".*")
.warmupIterations(5)
.measurementIterations(5)
.addProfiler(GCProfiler.class)
.addProfiler(StackProfiler.class)
.build();

new Runner(opt).run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public class InternalDistributionModuleCheckTaskProvider {
"org.elasticsearch.preallocate",
"org.elasticsearch.securesm",
"org.elasticsearch.server",
"org.elasticsearch.tdigest",
"org.elasticsearch.xcontent"
);

Expand All @@ -75,7 +76,7 @@ public class InternalDistributionModuleCheckTaskProvider {

private static final Function<ModuleReference, String> toName = mref -> mref.descriptor().name();

private InternalDistributionModuleCheckTaskProvider() {};
private InternalDistributionModuleCheckTaskProvider() {}

/** Registers the checkModules tasks, which contains all checks relevant to ES Java Modules. */
static TaskProvider<Task> registerCheckModulesTask(Project project, TaskProvider<Copy> checkExtraction) {
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/94089.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 94089
summary: Add support for `xlm_roberta` tokenized models
area: Machine Learning
type: feature
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/96716.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 96716
summary: Feature/speed up binary vector decoding
area: Search
type: enhancement
issues: []
7 changes: 7 additions & 0 deletions docs/changelog/96777.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
pr: 96777
summary: Fixing `GeoIpDownloaderStatsAction$NodeResponse` serialization by defensively
copying inputs
area: Ingest Node
type: bug
issues:
- 96438
5 changes: 5 additions & 0 deletions docs/changelog/96790.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 96790
summary: "[Profiling] Require POST to retrieve stacktraces"
area: Application
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ The response will look like this:
"aggregations": {
"load_time_ranks": {
"values": {
"500.0": 90.01,
"600.0": 100.0
"500.0": 55.0,
"600.0": 64.0
}
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
// TESTRESPONSE[s/"500.0": 90.01/"500.0": 55.00000000000001/]
// TESTRESPONSE[s/"600.0": 100.0/"600.0": 64.0/]
// TESTRESPONSE[s/"500.0": 55.0/"500.0": 55.00000000000001/]
// TESTRESPONSE[s/"600.0": 64.0/"600.0": 64.0/]

From this information you can determine you are hitting the 99% load time target but not quite
hitting the 95% load time target
Expand Down Expand Up @@ -101,20 +101,20 @@ Response:
"values": [
{
"key": 500.0,
"value": 90.01
"value": 55.0
},
{
"key": 600.0,
"value": 100.0
"value": 64.0
}
]
}
}
}
--------------------------------------------------
// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
// TESTRESPONSE[s/"value": 90.01/"value": 55.00000000000001/]
// TESTRESPONSE[s/"value": 100.0/"value": 64.0/]
// TESTRESPONSE[s/"value": 55.0/"value": 55.00000000000001/]
// TESTRESPONSE[s/"value": 64.0/"value": 64.0/]


==== Script
Expand Down
5 changes: 5 additions & 0 deletions docs/reference/ml/ml-shared.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,7 @@ values are
* `bert`: Use for BERT-style models
* `mpnet`: Use for MPNet-style models
* `roberta`: Use for RoBERTa-style and BART-style models
* `xlm_roberta`: Use for XLMRoBERTa-style models
--
end::inference-config-nlp-tokenization[]

Expand Down Expand Up @@ -1026,6 +1027,10 @@ Tokenize with special tokens. The tokens typically included in MPNet-style token
--
end::inference-config-nlp-tokenization-mpnet-with-special-tokens[]

tag::inference-config-nlp-tokenization-xlm-roberta[]
experimental:[] XLMRoBERTa-style tokenization is to be performed with the enclosed settings.
end::inference-config-nlp-tokenization-xlm-roberta[]

tag::inference-config-nlp-vocabulary[]
The configuration for retrieving the vocabulary of the model. The vocabulary is
then used at inference time. This information is usually provided automatically
Expand Down
Loading

0 comments on commit 1c713c7

Please sign in to comment.