es doc

LibertyAces · Apr 4, 2019 · 52edf32 · 52edf32
1 parent 9ad4914
commit 52edf32
Show file tree

Hide file tree

Showing 7 changed files with 125 additions and 38 deletions.
diff --git a/bspump/elasticsearch/__init__.py b/bspump/elasticsearch/__init__.py
@@ -3,3 +3,10 @@
 from .source import ElasticSearchSource, ElasticSearchAggsSource
 from .lookup import ElasticSearchLookup
 
+__all__ = [
+	"ElasticSearchConnection",
+	"ElasticSearchSink",
+	"ElasticSearchSource",
+	"ElasticSearchAggsSource",
+	"ElasticSearchLookup"
+]
diff --git a/bspump/elasticsearch/connection.py b/bspump/elasticsearch/connection.py
@@ -17,6 +17,31 @@
 
 
 class ElasticSearchConnection(Connection):
+	"""
+
+	ElasticSearchConnection allows your ES source, sink or lookup to connect to ElasticSearch instance
+
+	usage:
+
+	1. adding connection to PumpService
+
+.. code:: python
+
+	svc = app.get_service("bspump.PumpService")
+	svc.add_connection(
+		bspump.elasticsearch.ElasticSearchConnection(app, "ESConnection")
+	)
+
+	2. pass connection name ("ESConnection" in our example) to relevant BSPump's object:
+
+.. code:: python
+
+	self.build(
+			bspump.kafka.KafkaSource(app, self, "KafkaConnection"),
+			bspump.elasticsearch.ElasticSearchSink(app, self, "ESConnection")
+	)
+
+	"""
 
 	ConfigDefaults = {
 		'url': 'http://localhost:9200/', # Could be multiline, each line is a URL to a node in ElasticSearch cluster
@@ -29,7 +54,6 @@ class ElasticSearchConnection(Connection):
 		'allowed_bulk_response_codes': '201',
 	}
 
-
 	def __init__(self, app, connection_id, config=None):
 		super().__init__(app, connection_id, config=config)
 

diff --git a/bspump/elasticsearch/lookup.py b/bspump/elasticsearch/lookup.py
@@ -1,28 +1,43 @@
 import abc
 import requests
+import logging
 import json
 
 from ..abc.lookup import MappingLookup
 
+L = logging.getLogger(__name__)
+
+
 class ElasticSearchLookup(MappingLookup):
 
-	'''
-The lookup that is linked with a ES.
-It provides a mapping (dictionary-like) interface to pipelines.
-It feeds lookup data from ES using a query.
-It also has a simple cache to reduce a number of datbase hits.
+	"""
+	The lookup that is linked with a ES.
+	It provides a mapping (dictionary-like) interface to pipelines.
+	It feeds lookup data from ES using a query.
+	It also has a simple cache to reduce a number of datbase hits.
+
+	**configs**
+
+	*index* - Elastic's index
+
+	*key* - field name to match
 
-Example:
+	*scroll_timeout* - Timeout of single scroll request (default is '1m'). Allowed time units:
+	https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
 
-class ProjectLookup(bspump.elasticsearch.ElasticSearchLookup):
+	Example:
 
-	async def count(self, database):
-		return await database['projects'].count_documents({})
+.. code:: python
 
-	def find_one(self, database, key):
-		return database['projects'].find_one({'_id':key})
+	class ProjectLookup(bspump.elasticsearch.ElasticSearchLookup):
 
-	'''
+		async def count(self, database):
+			return await database['projects'].count_documents({})
+
+		def find_one(self, database, key):
+			return database['projects'].find_one({'_id':key})
+
+	"""
 
 	ConfigDefaults = {
 		'index': '', # Specify an index
@@ -44,8 +59,6 @@ def __init__(self, app, lookup_id, es_connection, config=None):
 		metrics_service = app.get_service('asab.MetricsService')
 		self.CacheCounter = metrics_service.create_counter("es.lookup", tags={}, init_values={'hit': 0, 'miss': 0})
 
-
-
 	def _find_one(self, key):
 		prefix = '_search'
 		request = {

diff --git a/bspump/elasticsearch/sink.py b/bspump/elasticsearch/sink.py
@@ -15,6 +15,10 @@
 #
 
 class ElasticSearchSink(Sink):
+	"""
+	ElasticSearchSink allows you to insert events into ElasticSearch through POST requests
+
+	"""
 
 
 	ConfigDefaults = {
@@ -24,7 +28,6 @@ class ElasticSearchSink(Sink):
 		"rollover_mechanism": 'time',
 		"max_index_size": 30*1024*1024*1024, #This is 30GB
 		"timeout": 30,
-
 	}
 
 

diff --git a/bspump/elasticsearch/source.py b/bspump/elasticsearch/source.py
@@ -10,10 +10,37 @@
 
 class ElasticSearchSource(TriggerSource):
 	"""
-	request_body - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html
 
-	scroll_timeout - Timeout of single scroll request. Allowed time units:
+	ElasticSearchSource is using standard Elastic's search API to fetch data.
+
+	**configs**
+
+	*index* - Elastic's index (default is 'index-``*``').
+
+	*scroll_timeout* - Timeout of single scroll request (default is '1m'). Allowed time units:
 	https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
+
+	**specific pamameters**
+
+	*paging* - boolean (default is True)
+
+	*request_body* - dictionary described by Elastic's doc:
+	https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html
+
+	Default is:
+
+.. code:: python
+
+	default_request_body = {
+		'query': {
+			'bool': {
+				'must': {
+					'match_all': {}
+				}
+			}
+		},
+	}
+
 	"""
 
 	ConfigDefaults = {
@@ -30,8 +57,6 @@ def __init__(self, app, pipeline, connection, request_body=None, paging=True, id
 		self.ScrollTimeout = self.Config['scroll_timeout']
 		self.Paging = paging
 
-
-		#print("index", self.Index)
 		if request_body is not None:
 			self.RequestBody = request_body
 		else:
@@ -44,7 +69,6 @@ def __init__(self, app, pipeline, connection, request_body=None, paging=True, id
 					}
 				}}
 
-
 	async def cycle(self):
 
 		scroll_id = None
@@ -91,7 +115,32 @@ async def cycle(self):
 
 class ElasticSearchAggsSource(TriggerSource):
 	"""
-	request_body - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html
+
+	ElasticSearchAggsSource is used for Elastic's search aggregations.
+
+	**configs**
+
+	*index*: - Elastic's index (default is 'index-``*``').
+
+	**specific pamameters**
+
+	*request_body*
+	dictionary described by Elastic's doc:
+	https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-body.html
+
+	Default is:
+
+.. code:: python
+
+	default_request_body = {
+		'query': {
+			'bool': {
+				'must': {
+					'match_all': {}
+				}
+			}
+		},
+	}
 	"""
 
 	ConfigDefaults = {
@@ -117,9 +166,6 @@ def __init__(self, app, pipeline, connection, request_body=None, id=None, config
 				}
 			}
 
-
-
-
 	async def cycle(self):
 		request_body = self.RequestBody
 		path = '{}/_search?'.format(self.Index)
@@ -140,7 +186,6 @@ async def cycle(self):
 
 		aggs = msg['aggregations']
 
-
 		if len(aggs) == 0:
 			return
 
@@ -150,9 +195,6 @@ async def cycle(self):
 		path = {}
 		await self.process_aggs(path, start_name, start)
 
-
-
-
 	async def process_aggs(self, path, aggs_name, aggs):
 
 		if 'buckets' in aggs:
@@ -166,15 +208,14 @@ async def process_aggs(self, path, aggs_name, aggs):
 			await self.process(event)
 			path.pop(aggs_name)
 
-
-
-
 	async def process_buckets(self, path, parent, buckets):
 		'''
+
 		Recursive function for buckets processing.
 		It iterates through keys of the dictionary, looking for 'buckets' or 'value'.
 		If there are 'buckets', calls itself, if there is 'value', calls process_aggs 
 		and sends an event to process
+
 		'''
 
 		for bucket in buckets:
@@ -183,9 +224,3 @@ async def process_buckets(self, path, parent, buckets):
 					path[parent] = bucket[k]
 				elif isinstance(bucket[k], dict): 
 					await self.process_aggs(path, k, bucket[k])
-
-
-
-
-
-
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -25,6 +25,7 @@
 import bspump
 import bspump.kafka
 import bspump.influxdb
+import bspump.elasticsearch
 
 # -- Project information -----------------------------------------------------
 

diff --git a/doc/source/index.rst b/doc/source/index.rst
@@ -22,6 +22,10 @@ Welcome to BSPump reference documentation!
     :members:
     :undoc-members:
 
+.. automodule:: bspump.elasticsearch
+    :members:
+    :undoc-members:
+
 Indices and tables
 ==================