In [1]:
from elasticsearch import Elasticsearch, helpers
import pandas as pd
import logging

In [2]:
class Search():
    def __init__(self, index_name):
        super().__init__()
        self.logger = logging.getLogger(__name__)
        self.__es = Elasticsearch([{'scheme': 'http', 'host':'localhost','port':9200}], basic_auth=('admin', 'es_pw'))
        
        self.__index_name = index_name
        if self.__es.indices.exists(index=self.__index_name):
            self.logger.debug('Deleting existing index ' + self.__index_name)
            self.__es.indices.delete(self.__index_name)
        
        self.__es.indices.create(index=self.__index_name)
        self.__es.cluster.health(wait_for_status='yellow')
    
    def index(self, type_name, id_value, content):
        self.logger.debug('index %s/%s : %s', type_name, id_value, content)
        self.__es.index(index=self.__index_name, doc_type=type_name, id=id_value, body=content)
    
    def map(self, type_name, mapping):
        self.logger.debug('map %s', type_name)
        self.__es.indices.put_mapping(index=self.__index_name, doc_type=type_name, body={type_name: mapping})
    
    def search(self, type_name, query={'match_all': {}}):
        self.logger.debug('search %s : %s', type_name, query)
        return self.__es.search(index=self.__index_name, doc_type=type_name, body={'query': query})
    
    def get(self, type_name, id_value):
        self.logger.debug('get %s/%s', type_name, id_value)
        document = self.__es.get(index=self.__index_name, doc_type=type_name, id=id_value)
        self.logger.debug('got document ' + document)
        return document
    
    def delete(self, type_name, id_value):
        self.logger.debug('delete %s/%s', type_name, id_value)
        self.__es.delete(index=self.__index_name, doc_type=type_name, id=id_value)

    def optimize(self):
        """ 
        forcemerge allows removal of deleted documents and reducing the number of segments
        (documents are marked as tombstone [like cassandra] but not purged from the segment's 
        index for performance reasons)
        """
        self.logger.debug('optimize')
        self.__es.forcemerge(self.__index_name)

    @property
    def es(self):
        return self.__es

    def __eq__(self, other):
        return self.__es == other.__es

    def __str__(self):
        return self.__es.__str__()

    def __hash__(self):
        return self.__es.__hash__()

In [3]:
es = Search(index_name='typo_check')

  
  if sys.path[0] == '':
  del sys.path[0]


In [4]:
# doc = [{'_id': w} for w in word_db]
# helpers.bulk(es, doc, index='word_typo',doc_type='_doc', request_timeout=200)