## Connection to ElasticSearch

In [5]:
from pprint import pprint
from elasticsearch import Elasticsearch
from elastic_transport import TransportError

In [6]:
from elasticsearch import Elasticsearch

es = Elasticsearch(
    "http://localhost:9200",
    headers={
        "Content-Type": "application/json",
        "Accept": "application/json"
    }
)

print(es.ping())

True


In [15]:
client_info = es.info()
pprint(client_info.body)

{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'kdgaRXuGS8yh7rxokb9Jwg',
 'name': 'eb3cb2c67bb9',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


## Create index

In [14]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index', settings={
    'index': {'number_of_shards': 1,
              'number_of_replicas': 1,}
})

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

## Inserting Documents

In [17]:
document = {
    'title': 'Elasticsearch Basics',
    'created_on': '2025-04-22',
    'author': 'John Doe',
    'content': 'This is a basic introduction to Elasticsearch.',
    'tags': ['elasticsearch', 'basics', 'introduction']
}

In [None]:
response = es.index(index='my_documents', body=document)
print(response)

{'_index': 'my_documents', '_id': 'b02ZX5YBSI5_NC425mvb', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}


In [20]:
response.body

{'_index': 'my_documents',
 '_id': 'b02ZX5YBSI5_NC425mvb',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 0,
 '_primary_term': 1}

In [21]:
index_mapping = es.indices.get_mapping(index='my_documents')
pprint(index_mapping['my_documents']['mappings']['properties'])

{'author': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
            'type': 'text'},
 'content': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
             'type': 'text'},
 'created_on': {'type': 'date'},
 'tags': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


In [24]:
pprint(index_mapping)

ObjectApiResponse({'my_documents': {'mappings': {'properties': {'author': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'content': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'created_on': {'type': 'date'}, 'tags': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'title': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}}})


## Field data types

In [27]:
es.indices.delete(index='binary_index', ignore_unavailable=True)
es.indices.create(
    index='binary_index',
    mappings={
        'properties':{
            'image_data':{
                'type': 'binary'
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'binary_index'})

In [29]:
import base64

with open('elasticsearch_datatypes.png', 'rb') as image_file:
    image_bytes = image_file.read()
    image_base64 = base64.b64encode(image_bytes).decode('utf-8')    

image_base64[:100] 

'iVBORw0KGgoAAAANSUhEUgAAAgAAAADkCAIAAABhUAsQAAAACXBIWXMAAAsTAAALEwEAmpwYAAAgAElEQVR4nOx9eXxURbb/qbq3'

In [30]:
len(image_base64)

75068

In [32]:
document = {
    'image_data': image_base64
}

response = es.index(index='binary_index', body=document)
print(response.body)

{'_index': 'binary_index', '_id': 'cU2rX5YBSI5_NC42HWsO', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}


## Object Types

In [34]:
es.indices.delete(index='object_index', ignore_unavailable=True)
es.indices.create(
    index = 'object_index',
    mappings = {
        'properties' : {
            'author': {
                'properties': {
                    'first_name': {
                        'type': 'text'
                    },
                    'last_name': {
                        'type': 'text'
                    }
                }
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'object_index'})

In [35]:
document = {
    'author':{
        'first_name': 'Ei Ei Ei ',
        'last_name': 'Mael'
    }
}

In [37]:
response = es.index(index='object_index', body=document)
response

ObjectApiResponse({'_index': 'object_index', '_id': 'c02zX5YBSI5_NC42FGuo', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

## Text search types

In [38]:
es.indices.delete(index='text_index', ignore_unavailable=True)
es.indices.create(
    index='text_index',
    mappings={
        'properties': {
            'email_body': {
                'type': 'text'
            }
        }
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'text_index'})

In [40]:
document = {
    'email_body': 'This is a test email body. It contains some text data that we want to index in Elasticsearch.'
}

response = es.index(index='text_index', body=document)
print(response.body)

{'_index': 'text_index', '_id': 'dE24X5YBSI5_NC42V2sV', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
