In [22]:
from pprint import pprint
from elasticsearch import Elasticsearch 

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch!')
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'tdRTedevS6WZDfd1oEOoeQ',
 'name': 'd3f17afee3cf',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-01-10T10:08:26.972230187Z',
             'build_flavor': 'default',
             'build_hash': 'd4b391d925c31d262eb767b8b2db8f398103f909',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.12.0',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.17.1'}}


In [23]:
es.indices.delete(index='products', ignore_unavailable=True)
es.indices.create(index='products')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'products'})

In [24]:
document = {
    'title': 'title',
    'text': 'text',
    'created_on': '2024-09-22',
}

response = es.index(index='products', body=document)
response

ObjectApiResponse({'_index': 'products', '_id': 'Dxq085QB7xzWnT7WgYpY', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [25]:
print(response["result"])
print(response["_shards"])
print(response["_id"])
print(response["_index"])

created
{'total': 2, 'successful': 1, 'failed': 0}
Dxq085QB7xzWnT7WgYpY
products


#### Insert multiple documents 

In [26]:
import json 

dummy_data = json.load(open("data/dummy_data.json"))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Here is another example of a document.',
  'created_on': '2024-09-24'},
 {'title': 'Sample Title 3',
  'text': 'The content of the third document goes here.',
  'created_on': '2024-09-24'}]

In [27]:
def insert_documents(document):
    response = es.index(index='products', body=document)
    return response

def print_info(response):
    print(f"""Document ID: {response['_id']} is '{
        response["result"]}' and is split into {response['_shards']['total']} shards. """)
    
for document in dummy_data:
    response = insert_documents(document)
    print_info(response)

Document ID: EBq085QB7xzWnT7WgYqm is 'created' and is split into 2 shards. 
Document ID: ERq085QB7xzWnT7WgYq1 is 'created' and is split into 2 shards. 
Document ID: Ehq085QB7xzWnT7WgYq9 is 'created' and is split into 2 shards. 


Print mapping

In [28]:
from pprint import pprint 

index_mapping = es.indices.get_mapping(index='products')
pprint(index_mapping["products"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


Manual mapping generally not advices, but should be done before data is added 

In [29]:
es.indices.delete(index='products', ignore_unavailable=True)
es.indices.create(index='products')

mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}

es.indices.put_mapping(index='products', body=mapping) # will insert the required mapping 'put_mapping()'

index_mapping = es.indices.get_mapping(index='products')
pprint(index_mapping["products"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


In [30]:

mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}


es.indices.delete(index='products', ignore_unavailable=True)
es.indices.create(index='products', mappings=mapping)


index_mapping = es.indices.get_mapping(index='products')
pprint(index_mapping["products"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}
