In [39]:
import json
import logging
import elasticsearch
from elasticsearch import helpers

In [10]:
def connect_elasticssearch():
    config = {
        'host': 'localhost',
        'port': '9200'
    }
    _es = elasticsearch.Elasticsearch([config,], timeout=300)
    if _es.ping():
        print('Conectado!')
    else:
        print('Não há conexão com o Elasticsearch')
    return _es

if __name__ == '__main__':
  logging.basicConfig(level=logging.ERROR)


In [11]:
def create_index(es_object, index_name):
    created = False
    #index settings
    settings = {
        "settings":{
            "number_of_shards": 1,
            "number_of_replicas": 0
        }
    }
    
    try:
        if not es_object.indices.exists(index_name):
                # Ignore 400 means to ignore "Index Already Exist" error.
                es_object.indices.create(index=index_name, body=settings)
                print('Created Index')
        created = True
    except Exception as ex:
        print(str(ex))
    finally:
            return created


In [14]:
def get_indices(es_object):
    return es_object.indices.get_alias("*")

In [24]:
def delete_all_indices(es_object):
    
    for key in es_object.indices.get_alias("*").keys():
        es_object.indices.delete(index=key)

In [64]:
def get_data_from_file(filename):
    data = [l.strip() for l in open(filename, encoding="utf8", errors='ignore')]
    return data

In [65]:
def bulk_insert_from_file(es_object, filename):
    data = get_data_from_file(filename)
    response = helpers.bulk(es_object, 
                            data,
                            index = 'accounts',
                            doc_type = '_doc'
                           )

In [73]:
def get_all_documents(es_object, index_name, size):
    
    body = {
    'size': size,
    'query': {
        'match_all': {}
        } 
    }

    response = es_object.search(index = index_name, doc_type = '_doc', body = body)

    return [doc for doc in response['hits']['hits']]

In [29]:
es_object = connect_elasticssearch()

Conectado!


In [30]:
create_index(es_object, 'accounts')

Created Index


True

In [31]:
get_indices(es_object)

{'accounts': {'aliases': {}}}

In [66]:
bulk_insert_from_file(es_object, 'accounts.json')

In [72]:
body = {
    'size': 1000,
    'query': {
        'match_all': {}
    } 
}

response = es_object.search(index = 'accounts', doc_type = '_doc', body = body)

for doc in response['hits']['hits']:
    print(doc)

{'_index': 'accounts', '_type': '_doc', '_id': 'MHNYanQBXWwj1Njsnvzq', '_score': 1.0, '_source': {'index': {'_id': '1'}}}
{'_index': 'accounts', '_type': '_doc', '_id': 'MXNYanQBXWwj1Njsnvzq', '_score': 1.0, '_source': {'account_number': 1, 'balance': 39225, 'firstname': 'Amber', 'lastname': 'Duke', 'age': 32, 'gender': 'M', 'address': '880 Holmes Lane', 'employer': 'Pyrami', 'email': 'amberduke@pyrami.com', 'city': 'Brogan', 'state': 'IL'}}
{'_index': 'accounts', '_type': '_doc', '_id': 'MnNYanQBXWwj1Njsnvzq', '_score': 1.0, '_source': {'index': {'_id': '6'}}}
{'_index': 'accounts', '_type': '_doc', '_id': 'M3NYanQBXWwj1Njsnvzq', '_score': 1.0, '_source': {'account_number': 6, 'balance': 5686, 'firstname': 'Hattie', 'lastname': 'Bond', 'age': 36, 'gender': 'M', 'address': '671 Bristol Street', 'employer': 'Netagy', 'email': 'hattiebond@netagy.com', 'city': 'Dante', 'state': 'TN'}}
{'_index': 'accounts', '_type': '_doc', '_id': 'NHNYanQBXWwj1Njsnvzq', '_score': 1.0, '_source': {'index'

In [71]:
response['hits']

{'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}