Updated event format (elastic#17)

* Initial format update * Working version * Added agent string * Fixed JSON formatting bug * Store reference data between invokations * Add missing file * Updated dashboards * Added missing file * Updated fieldstats method name * WIP * Corrected Discover query * Updated following review * Fixed data format * Fixed format error in reference data
henningandersen · May 31, 2018 · 115128c · 115128c
1 parent 0974124
commit 115128c
Show file tree

Hide file tree

Showing 47 changed files with 3,794 additions and 259 deletions.
diff --git a/README.md b/README.md
@@ -17,6 +17,8 @@ eventdata.url = https://github.com/elastic/rally-eventdata-track
 
 The track can be run by specifying the following runtime parameters: `--track=eventdata` and `--track-repository=eventdata`
 
+Another option is to download the repository and point to it using the `--track-path` command line parameter.
+
 ## Available Challenges
 
 ### 1) append-no-conflicts
@@ -35,7 +37,7 @@ This challenge shows how shard sizing can be performed and how the nature of que
 
 ### 4) elasticlogs-1bn-load
 
-This challenge indexes 1 billion events into 20 indices of 2 primary shards each, and results in around 200GB of indices being generated on disk. This can vary depending on the environment. It can be used give an idea of how max indexing performance behaves over an extended period of time.
+This challenge indexes 1 billion events into a number of indices of 2 primary shards each, and results in around 200GB of indices being generated on disk. This can vary depending on the environment. It can be used give an idea of how max indexing performance behaves over an extended period of time.
 
 ### 5) elasticlogs-querying
 
@@ -51,74 +53,114 @@ In this challenge rate-limited indexing at varying levels is combined with a fix
 
 ### elasticlogs\_bulk\_source
 
-This parameter source generated bulk indexing requests filled with auto-generated data. This data is generated based on statistics from a subset of real traffic to the elastic.co website. Data has been anonymised and post-processed.
+This parameter source generated bulk indexing requests filled with auto-generated data. This data is generated based on statistics from a subset of real traffic to the elastic.co website. Data has been anonymised and post-processed and is modelled on the format used by the Filebeat Nginx Module.
 
-The generator is configurable and allows data to be generated in real-time or against a set date/tine interval. A sample event looks will contain the following fields:
+The generator allows data to be generated in real-time or against a set date/tine interval. A sample event will contain the following fields:
 
 ```
 {
-	"agent": "\"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36\"",
-	"useragent": {
-		"os": "Windows 7",
-		"os_name": "Windows 7",
-		"name": "Chrome"
+	"@timestamp": "2017-06-01T00:01:08.866644Z",
+	"offset": 7631775,
+	"user_name": "-",
+	"source": "/usr/local/var/log/nginx/access.log",
+	"fileset": {
+		"module": "nginx",
+		"name": "access"
+	},
+	"input": {
+		"type": "log"
+	},
+	"beat": {
+		"version": "6.3.0",
+		"hostname": "web-EU-1.elastic.co",
+		"name": "web-EU-1.elastic.co"
 	},
-	"geoip": {
-		"country_name": "Canada",
-		"location": [-79.4167, 43.6667]
+	"prospector": {
+		"type": "log"
 	},
-	"clientip": "205.210.17.0",
-	"referrer": "\"https://www.elastic.co/guide/en/logstash/current/getting-started-with-logstash.html\"",
-	"request": "/static/css/token-input.css",
-	"bytes": 517,
-	"verb": "GET",
-	"response": 200,
-	"httpversion": "1.1",
-	"@timestamp": "2017-02-22T13:09:06.345Z",
-	"message": "205.210.17.0 - - [2017-02-22T13:09:06.345Z] \"GET /static/css/token-input.css HTTP/1.1\" 200 517 \"-\" \"https://www.elastic.co/guide/en/logstash/current/getting-started-with-logstash.html\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36\""
+	"nginx": {
+		"access": {
+			"user_agent": {
+				"major": "44",
+				"os": "Mac OS X",
+				"os_major": "10",
+				"name": "Firefox",
+				"os_name": "Mac OS X",
+				"device": "Other"
+			},
+			"remote_ip": "5.134.208.0",
+			"remote_ip_list": [
+				"5.134.208.0"
+			],
+			"geoip": {
+				"continent_name": "Europe",
+				"city_name": "Grupa",
+				"country_name": "Poland",
+				"country_iso_code": "PL",
+				"location": {
+					"lat": 53.5076,
+					"lon": 18.6358
+				}
+			},
+			"referrer": "https://www.elastic.co/guide/en/marvel/current/getting-started.html",
+			"url": "/guide/en/kibana/current/images/autorefresh-pause.png",
+			"body_sent": {
+				"bytes": 2122
+			},
+			"method": "GET",
+			"response_code": "200",
+			"http_version": "1.1"
+		}
+	}
 }
 ```
 
 ### elasticlogs\_kibana\_source
 
 This parameter source supports simulating two different types of dashboards.
 
-**traffic** - This dashboard contains 7 visualisations and presents different types of traffic statistics. It does aggregate across all records and is therefore a quite 'heavy' dashboard.
+**traffic** - This dashboard contains 7 visualisations and presents different types of traffic statistics. In structure it is similar to the `Nginx Overview` dashboard that comes with the Filebeat Nginx Module. It does aggregate across all records in the index and is therefore a quite 'heavy' dashboard.
+
+![Eventdata traffic dashboard](eventdata/dashboards/images/eventdata_traffic_dashboard.png)
+
+**content\_issues** - This dashboard contains 5 visualisations and is designed to be used for analysis of records with a 404 response code, e.g. to find links that are no longer leading anywhere. This only aggregates across a small subset of the records in an index and is therefore considerably 'lighter' than the **traffic** dashboard.
+
+![Eventdata content issues dashboard](eventdata/dashboards/images/eventdata_content_issues_dashboard.png)
 
-**content\_issues** - This dashboard contains 6 visualisations and is designed to be used for analysis of records with a 404 response code, e.g. to find dead internal links or external links that are no longer leading anywhere. This only aggregates across a small subset of the records in an index and is therefore considerably 'lighter' than the **traffic** dashboard.
+**discover** - This simulates querying data through the `Discover` application in Kibana.
 
 ## Extending and adapting
 
 This track can be used as it is, but was designed so that it would be easy to extend or modify it. There are two directories named **operations** and **challenges**, containing files with the standard components of this track that can be used as an example. The main **track.json** file will automatically load all files with a *.json* suffix from these directories. This makes it simple to add new operations and challenges without having to update or modify any of the original files.
 
 ## Elasticsearch Compatibility
 
-This track requires Elasticsearch 5.x. Earlier versions are not supported.
+This track requires Elasticsearch 6.x. Earlier versions are not supported.
 
 ## Versioning Scheme
 
 From time to time, setting and mapping formats change in Elasticsearch. As we want to be able to support multiple versions of Elasticsearch, we also need to version track specifications. Therefore, this repository contains multiple branches. The following examples should give you an idea how the versioning scheme works:
 
 - master: tracks on this branch are compatible with the latest development version of Elasticsearch
-- 5.0.0-alpha2: compatible with the released version 5.0.0-alpha2.
+- 6: compatible with all Elasticsearch 6.x releases.
 - 2: compatible with all Elasticsearch releases with the major release number 2 (e.g. 2.1, 2.2, 2.2.1)
 - 1.7: compatible with all Elasticsearch releases with the major release number 1 and minor release number 7 (e.g. 1.7.0, 1.7.1, 1.7.2)
 
 As you can see, branches can match exact release numbers but Rally is also lenient in case settings mapping formats did not change for a few releases. Rally will try to match in the following order:
 
-1. major.minor.patch-extension_label (e.g. 5.0.0-alpha5)
-2. major.minor.patch (e.g. 2.3.1)
-3. major.minor (e.g. 2.3)
-4. major (e.g. 2)
+1. major.minor.patch-extension_label (e.g. 6.0.0-alpha2)
+2. major.minor.patch (e.g. 6.2.3)
+3. major.minor (e.g. 6.2)
+4. major (e.g. 6)
 
 Apart from that, the master branch is always considered to be compatible with the Elasticsearch master branch.
 
 To specify the version to check against, add `--distribution-version` when running Rally. It it is not specified, Rally assumes that you want to benchmark against the Elasticsearch master version. 
 
-Example: If you want to benchmark Elasticsearch 5.1.1, run the following command:
+Example: If you want to benchmark Elasticsearch 6.2.4, run the following command:
 
 ```
-esrally --distribution-version=5.1.1
+esrally --distribution-version=6.2.4
 ```
 
 How to Contribute
@@ -133,7 +175,7 @@ License
 
 This software is licensed under the Apache License, version 2 ("ALv2"), quoted below.
 
-Copyright 2015-2017 Elasticsearch <https://www.elastic.co>
+Copyright 2015-2018 Elasticsearch <https://www.elastic.co>
 
 Licensed under the Apache License, Version 2.0 (the "License"); you may not
 use this file except in compliance with the License. You may obtain a copy of

diff --git a/eventdata/challenges/bulk-size-evaluation.json b/eventdata/challenges/bulk-size-evaluation.json
@@ -1,5 +1,5 @@
 {% set p_bulk_indexing_clients = (bulk_indexing_clients | default(8)) %}
-{% set p_replica_count = (replica_count | default(0)) %}
+{% set p_replica_count = (number_of_replicas | default(0)) %}
 {% set p_shard_count = (shard_count | default(2)) %}
 
 {
@@ -77,4 +77,4 @@
       "clients": {{ p_bulk_indexing_clients }}
     }
   ]
-}
+}
diff --git a/eventdata/challenges/combined-indexing-and-querying.json b/eventdata/challenges/combined-indexing-and-querying.json
@@ -16,7 +16,7 @@
       "operation": "deleteindex_elasticlogs_i-*"
     },
     {
-      "operation": "fieldstats_elasticlogs_q-*_ELASTICLOGS",
+      "operation": "fieldstats_elasticlogs_q-*",
       "warmup-iterations": {{ p_client_count }},
       "iterations": {{ p_client_count }},
       "clients": {{ p_client_count }}
@@ -131,4 +131,4 @@
       "iterations": 1
     }
   ]
-}
+}
diff --git a/eventdata/challenges/elasticlogs-querying.json b/eventdata/challenges/elasticlogs-querying.json
@@ -3,19 +3,19 @@
   "description": "This challenge simulates a set of Kibana queries against historical data (elasticlogs_q-* indices) without any indexing taking place for a period of 30 minutes. It assumes one of the challenges creating elasticlogs_q-* indices has been run.",
   "meta": {
     "benchmark_type": "querying",
-    "target_kibana_queries_per_minute": 4
+    "target_kibana_queries_per_minute": 5
   },
   "schedule": [
     {
-      "operation": "fieldstats_elasticlogs_q-*_ELASTICLOGS",
+      "operation": "fieldstats_elasticlogs_q-*",
       "iterations": 1,
-      "clients": 3
+      "clients": 4
     },
     {
       "parallel": {
         "warmup-time-period": 0,
         "time-period": 1800,
-        "clients": 3,
+        "clients": 4,
         "tasks": [
           {
             "operation": "relative-kibana-content_issues-dashboard_50%",
@@ -28,9 +28,13 @@
           {
             "operation": "relative-kibana-traffic-dashboard_50%",
             "target-interval": 40
+          },
+          {
+            "operation": "relative-kibana-discover_50%",
+            "target-interval": 60
           }
         ]
       }
     }
   ]
-}
+}
diff --git a/eventdata/challenges/generate-historic-data.json b/eventdata/challenges/generate-historic-data.json
@@ -22,4 +22,4 @@
       "clients": {{ p_bulk_indexing_clients }}
     }
   ]
-}
+}
diff --git a/eventdata/challenges/shard-size-on-disk.json b/eventdata/challenges/shard-size-on-disk.json
@@ -8,18 +8,10 @@
   },
   "schedule": [
     {
-      "operation": "delete-index"
+      "operation": "deleteindex_elasticlogs"
     },
     {
-      "operation": {
-        "operation-type": "create-index",
-        "settings": {
-          "index.number_of_replicas": 0,
-          "index.number_of_shards": 1,
-          "index.refresh_interval": "5s",
-          "index.codec": "best_compression"
-        }
-      }
+      "operation": "create_elasticlogs_index"
     },
     {% for n in range(1,50) %}
     {
@@ -56,4 +48,4 @@
       }
     }
   ]
-}
+}
diff --git a/eventdata/challenges/shard-sizing.json b/eventdata/challenges/shard-sizing.json
@@ -10,18 +10,10 @@
   },
   "schedule": [
     {
-      "operation": "delete-index"
+      "operation": "deleteindex_elasticlogs"
     },
     {
-      "operation": {
-        "operation-type": "create-index",
-        "settings": {
-          "index.number_of_replicas": 0,
-          "index.number_of_shards": 1,
-          "index.refresh_interval": "5s",
-          "index.codec": "best_compression"
-        }
-      }
+      "operation": "create_elasticlogs_index"
     },
     {% for n in range(1, p_shard_sizing_iterations) %}
     {
@@ -118,4 +110,4 @@
       }
     }
   ]
-}
+}
diff --git a/eventdata/challenges/test.json b/eventdata/challenges/test.json
@@ -0,0 +1,30 @@
+{
+  "name": "test",
+  "description": "This challenge simulates a set of Kibana queries against historical data (elasticlogs_q-* indices) as well as against the most recent data currently being indexed. It combined this with rate-limited indexing at varying levels. It assumes one of the challenges creating elasticlogs_q-* indices has been run.",
+  "meta": {
+    "benchmark_type": "indexing/querying",
+    "target_kibana_queries_per_minute": 7
+  },
+  "schedule": [
+    {
+      "operation": "deleteindex_elasticlogs_i-*"
+    },
+    {
+      "operation": "create_elasticlogs_i_write",
+      "clients": 1,
+      "warmup-iterations": 0,
+      "iterations": 1
+    },
+    {
+      "operation": "index-append-1000-elasticlogs_i_write",    
+      "warmup-time-period": 0,
+      "time-period": 30,
+      "target-throughput": 2,
+      "clients": 2
+    },
+    {
+      "operation": "indicesstats_elasticlogs_i-*",
+      "iterations": 1
+    }
+  ]
+}
diff --git a/eventdata/challenges/test2.json b/eventdata/challenges/test2.json
@@ -0,0 +1,23 @@
+{% set p_bulk_indexing_clients = (bulk_indexing_clients | default(8)) %}
+
+{
+  "name": "elasticlogs-100m-load",
+  "description": "Indexes 100m documents into the elasticlogs index. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.",
+  "meta": {
+    "client_count": {{ p_bulk_indexing_clients }},
+    "benchmark_type": "indexing"
+  },
+  "schedule": [
+    {
+      "operation": "deleteindex_elasticlogs"
+    },
+    {
+      "operation": "create_elasticlogs_index"
+    },
+    {
+      "operation": "index-append-1000-shard-sizing",
+      "iterations": 100000,
+      "clients": {{ p_bulk_indexing_clients }}
+    }
+  ]
+}