Skip to content

Commit

Permalink
updated docuemntation and added readme
Browse files Browse the repository at this point in the history
  • Loading branch information
dvirsky committed Aug 11, 2016
1 parent eb14c4c commit 37ee3db
Show file tree
Hide file tree
Showing 17 changed files with 1,468 additions and 84 deletions.
12 changes: 12 additions & 0 deletions LICENSE
@@ -0,0 +1,12 @@
Copyright (c) 2016, Redis Labs, Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 changes: 68 additions & 0 deletions README.md
@@ -0,0 +1,68 @@
# RediSearchBenchmarks

Source code for benchmarking the RediSearch module, providing scalable high performance full-text search.

## What's in here

This is a Go application that can ingest data into the search engine, and benchmark the throughput and latency of running these benchmarks.

It supports reading [Wikipedia Abstract Data Dumps](https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-abstract.xml) and indexing them, in three search engines:

* [RediSearch](https://github.com/RedisLabsModules/RediSearch)
* [ElasticSearch](https://www.elastic.co/)
* [Solr](http://lucene.apache.org/solr/)

## Benchmark output

For each benchmark, we append a single line to a CSV file, with the engine used, benchmark type, query, concurrency, throughput and latency.

The default file name is `benchmark.csv`, and running the app with `-o -` will result in the result printed to stdout.

The output for running a benchmark on the queries "foo,bar,baz" with 4 concurrent clients, looks like this:

```
redis,"search: foo,bar,baz",4,14997.81,0.27
```

## Usage

```
Usage of ./RediSearchBenchmark:
-benchmark string
[search|suggest] - if set, we run the given benchmark
-c int
benchmark concurrency (default 4)
-duration int
number of seconds to run the benchmark (default 5)
-engine string
[redis|elastic|solr] The search backend to run (default "redis")
-file string
Input file to ingest data from (wikipedia abstracts)
-fuzzy
For redis only - benchmark fuzzy auto suggest
-hosts string
comma separated list of host:port to redis nodes (default "localhost:6379")
-o string
results output file. set to - for stdout (default "benchmark.csv")
-queries string
comma separated list of queries to benchmark (default "hello world")
-scores string
read scores of documents CSV for indexing
-shards int
the number of partitions we want (AT LEAST the number of cluster shards) (default 1)
```

## Example: Indexing documents into RediSearch

```
./RediSearchBenchmark -engine redis -shards 4 -hosts "localhost:6379,localhost:6380,localhost:6381,localhost:6382" \
-file ~/wiki/enwiki-20160305-abstract.xml -scores ~/wiki/scores.csv
```

## Example: Benchmarking RediSearch with 32 concurrent clients

```
./RediSearchBenchmark -engine redis -shards 4 -hosts "localhost:6379,localhost:6380,localhost:6381,localhost:6382" \
-benchmark search -queries "hello world,foo bar" -c 32 -o out.csv
```

26 changes: 19 additions & 7 deletions benchmark.go
Expand Up @@ -7,12 +7,15 @@ import (
"math/rand"
"os"
"sync"
"sync/atomic"
"time"

"github.com/RedisLabs/RediSearchBenchmark/index"
"github.com/RedisLabs/RediSearchBenchmark/query"
)

// SearchBenchmark returns a closure of a function for the benchmarker to run, using a given index
// and options, on a set of queries
func SearchBenchmark(queries []string, idx index.Index, opts interface{}) func() error {

counter := 0
Expand All @@ -25,6 +28,8 @@ func SearchBenchmark(queries []string, idx index.Index, opts interface{}) func()

}

// AutocompleteBenchmark returns a configured autocomplete benchmarking function to be run by
// the benchmarker
func AutocompleteBenchmark(ac index.Autocompleter, fuzzy bool) func() error {
counter := 0
sz := len(prefixes)
Expand All @@ -34,6 +39,13 @@ func AutocompleteBenchmark(ac index.Autocompleter, fuzzy bool) func() error {
return err
}
}

// Benchmark runs a given function f for the given duration, and outputs the throughput and latency of the function.
//
// It receives metadata like the engine we are running and the title of the specific benchmark, and writes these along
// with the results to a CSV file given by outfile.
//
// If outfile is "-" we write the result to stdout
func Benchmark(concurrency int, duration time.Duration, engine, title string, outfile string, f func() error) {

var out io.WriteCloser
Expand All @@ -48,14 +60,12 @@ func Benchmark(concurrency int, duration time.Duration, engine, title string, ou
defer out.Close()
}

// queries = []string{"weezer", "germany", "a", "music", "music of the spheres", "abba", "queen",
// "nirvana", "benjamin netanyahu", "redis", "redis labs", "german history"} // "computer science", "machine learning"}
//queries := []string{"earth Though is", "school etc"}
startTime := time.Now()
totalTime := time.Duration(0)
// the total time it took to run the functions, to measure average latency, in nanoseconds
var totalTime uint64
var total uint64
wg := sync.WaitGroup{}

total := 0
end := time.Now().Add(duration)

for i := 0; i < concurrency; i++ {
Expand All @@ -68,9 +78,10 @@ func Benchmark(concurrency int, duration time.Duration, engine, title string, ou
if err = f(); err != nil {
panic(err)
}
total++

totalTime += time.Since(tst)
// update the total requests performed and total time
atomic.AddUint64(&total, 1)
atomic.AddUint64(&totalTime, uint64(time.Since(tst)))

}
wg.Done()
Expand All @@ -81,6 +92,7 @@ func Benchmark(concurrency int, duration time.Duration, engine, title string, ou
avgLatency := (float64(totalTime) / float64(total)) / float64(time.Millisecond)
rate := float64(total) / (float64(time.Since(startTime)) / float64(time.Second))

// Output the results to CSV
w := csv.NewWriter(out)

err = w.Write([]string{engine, title,
Expand Down
5 changes: 5 additions & 0 deletions index/document.go
Expand Up @@ -4,12 +4,15 @@ import (
"sort"
)

// Document represents a single document to be indexed or returned from a query.
// Besides a score and id, the Properties are completely arbitrary
type Document struct {
Id string
Score float32
Properties map[string]interface{}
}

// NewDocument creates a document with the specific id and score
func NewDocument(id string, score float32) Document {
return Document{
Id: id,
Expand All @@ -18,6 +21,7 @@ func NewDocument(id string, score float32) Document {
}
}

// Set sets a property and its value in the document
func (d Document) Set(name string, value interface{}) Document {
d.Properties[name] = value
return d
Expand All @@ -30,6 +34,7 @@ func (l DocumentList) Len() int { return len(l) }
func (l DocumentList) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func (l DocumentList) Less(i, j int) bool { return l[i].Score > l[j].Score } //reverse sorting

// Sort the DocumentList
func (l DocumentList) Sort() {
sort.Sort(l)
}
87 changes: 58 additions & 29 deletions index/elastic/elastic.go
Expand Up @@ -2,6 +2,7 @@ package elastic

import (
"encoding/json"
"errors"
"net/http"
"time"

Expand All @@ -10,18 +11,21 @@ import (
"gopkg.in/olivere/elastic.v3"
)

// Index is an ElasticSearch index
type Index struct {
conn *elastic.Client

md *index.Metadata
name string
typ string
}

func NewIndex(addr, name string, md *index.Metadata) (*Index, error) {
// NewIndex creates a new elasticSearch index with the given address and name. typ is the entity type
func NewIndex(addr, name, typ string, md *index.Metadata) (*Index, error) {

client := &http.Client{
Transport: &http.Transport{
MaxIdleConnsPerHost: 500,
MaxIdleConnsPerHost: 200,
},
Timeout: 250 * time.Millisecond,
}
Expand All @@ -33,47 +37,65 @@ func NewIndex(addr, name string, md *index.Metadata) (*Index, error) {
conn: conn,
md: md,
name: name,
typ: typ,
}

return ret, nil

}

type mappingProperty map[string]interface{}

type mapping struct {
Properties map[string]mappingProperty `json:"properties"`
}

// convert a fieldType to elastic mapping type string
func fieldTypeString(f index.FieldType) (string, error) {
switch f {
case index.TextField:
return "string", nil
case index.NumericField:
return "double", nil
default:
return "", errors.New("Unsupported field type")
}
}

// Create creates the index and posts a mapping corresponding to our Metadata
func (i *Index) Create() error {

docMapping := `
{
"mappings": {
"doc":{
"properties":{
"body":{
"type":"string"
},
"title":{
"type":"string"
}
}
},
"autocomplete":{
"properties":{
"sugg":{
"type":"completion",
"payloads":true
}
}
}
doc := mapping{Properties: map[string]mappingProperty{}}
for _, f := range i.md.Fields {
doc.Properties[f.Name] = mappingProperty{}
fs, err := fieldTypeString(f.Type)
if err != nil {
return err
}
doc.Properties[f.Name]["type"] = fs
}
`

_, err := i.conn.CreateIndex(i.name).BodyJson(docMapping).Do()
// we currently manually create the autocomplete mapping
ac := mapping{
Properties: map[string]mappingProperty{
"sugg": mappingProperty{
"type": "completion",
"payloads": true,
},
},
}

mappings := map[string]mapping{
i.typ: doc,
"autocomplete": ac,
}

_, err := i.conn.CreateIndex(i.name).BodyJson(map[string]interface{}{"mappings": mappings}).Do()

return err
}

// Add indexes one entry in the index.
// TODO: Add support for multiple insertions
// Index indexes multiple documents
func (i *Index) Index(docs []index.Document, opts interface{}) error {

blk := i.conn.Bulk()
Expand All @@ -88,6 +110,8 @@ func (i *Index) Index(docs []index.Document, opts interface{}) error {
return err
}

// Search searches the index for the given query, and returns documents,
// the total number of results, or an error if something went wrong
func (i *Index) Search(q query.Query) ([]index.Document, int, error) {

eq := elastic.NewQueryStringQuery(q.Term)
Expand Down Expand Up @@ -116,12 +140,14 @@ func (i *Index) Search(q query.Query) ([]index.Document, int, error) {
return ret, int(res.TotalHits()), err
}

// Drop deletes the index
func (i *Index) Drop() error {
i.conn.DeleteIndex(i.name).Do()

return nil
}

// AddTerms add suggestion terms to the suggester index
func (i *Index) AddTerms(terms ...index.Suggestion) error {
blk := i.conn.Bulk()

Expand All @@ -137,6 +163,9 @@ func (i *Index) AddTerms(terms ...index.Suggestion) error {
return err

}

// Suggest gets completion suggestions for a given prefix.
// TODO: fuzzy not supported yet
func (i *Index) Suggest(prefix string, num int, fuzzy bool) ([]index.Suggestion, error) {

s := elastic.NewCompletionSuggester("autocomplete").Field("sugg").Text(prefix).Size(num)
Expand All @@ -152,7 +181,7 @@ func (i *Index) Suggest(prefix string, num int, fuzzy bool) ([]index.Suggestion,

ret := make([]index.Suggestion, 0, len(opts))
for _, op := range opts {
ret = append(ret, index.Suggestion{op.Text, float64(op.Score)})
ret = append(ret, index.Suggestion{Term: op.Text, Score: float64(op.Score)})
}
return ret, nil
}
Expand Down
7 changes: 4 additions & 3 deletions index/elastic/elastic_test.go
Expand Up @@ -11,12 +11,12 @@ import (
)

func TestIndex(t *testing.T) {
t.SkipNow()
//t.SkipNow()
// todo: run redisearch automatically
md := index.NewMetadata().AddField(index.NewTextField("title", 1.0)).
AddField(index.NewNumericField("score"))

idx, err := NewIndex("http://localhost:9200", "testung", md)
idx, err := NewIndex("http://localhost:9200", "testung", "doc", md)
assert.NoError(t, err)
assert.NoError(t, idx.Drop())
assert.NoError(t, idx.Create())
Expand Down Expand Up @@ -46,10 +46,11 @@ func TestIndex(t *testing.T) {
}

func TestSuggest(t *testing.T) {

md := index.NewMetadata().AddField(index.NewTextField("title", 1.0)).
AddField(index.NewNumericField("score"))

idx, err := NewIndex("http://localhost:9200", "testung", md)
idx, err := NewIndex("http://localhost:9200", "testung", "doc", md)
assert.NoError(t, err)
assert.NoError(t, idx.Drop())
assert.NoError(t, idx.Create())
Expand Down
2 changes: 2 additions & 0 deletions index/index.go
Expand Up @@ -4,6 +4,8 @@ import (
"github.com/RedisLabs/RediSearchBenchmark/query"
)

// Index is the abstract representation of a search index we're working against.
// It is implemented for redisearch, elasticserch and solr.
type Index interface {
Index(documents []Document, options interface{}) error
Search(query.Query) (docs []Document, total int, err error)
Expand Down

0 comments on commit 37ee3db

Please sign in to comment.