In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch!')
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'WpOI-sfBSXe9aaVkWGIQnQ',
 'name': '61929d733ddf',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-12-16T10:09:08.849001802Z',
             'build_flavor': 'default',
             'build_hash': 'd8972a71dbbd64ff17f2f4dba9ca2c3fe09fb100',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.2',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.3'}}


In [2]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [None]:
def insert_document(document):
    response = es.index(index='my_index', body=document)
    return response.body

import json
with open('../data/dummy_data.json') as f:
    documents = json.load(f)

doc_id = []
for doc in documents:
    response = insert_document(doc)
    doc_id.append(response['_id'])
pprint(doc_id)

['RKvP8JsBkibQBq1f61sS', 'RavP8JsBkibQBq1f61t2', 'RqvP8JsBkibQBq1f61uB']


## Update API

### 1. If documents exists in the index

#### 1.1 Update an existing field

In [11]:
response = es.update(index='my_index', id=doc_id[0], doc={
    "title": "Updated Title 1"
})
response.body

{'_index': 'my_index',
 '_id': 'RKvP8JsBkibQBq1f61sS',
 '_version': 3,
 'result': 'noop',
 '_shards': {'total': 0, 'successful': 0, 'failed': 0},
 '_seq_no': 4,
 '_primary_term': 1}

In [12]:
response = es.get(index='my_index', id=doc_id[0], ignore=[404])
response.body

  response = es.get(index='my_index', id=doc_id[0], ignore=[404])


{'_index': 'my_index',
 '_id': 'RKvP8JsBkibQBq1f61sS',
 '_version': 3,
 '_seq_no': 4,
 '_primary_term': 1,
 'found': True,
 '_source': {'title': 'Updated Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'}}

### 1.2 Scripted Updates

In [13]:
response = es.update(index='my_index', id=doc_id[0], script={
    "source" : "ctx._source.title = params.title",
    "params" : {
        "title" : "Updated Title 1"
    }
})
response.body

{'_index': 'my_index',
 '_id': 'RKvP8JsBkibQBq1f61sS',
 '_version': 4,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 5,
 '_primary_term': 1}

In [14]:
response = es.get(index='my_index', id=doc_id[0], ignore=[404])
response.body

  response = es.get(index='my_index', id=doc_id[0], ignore=[404])


{'_index': 'my_index',
 '_id': 'RKvP8JsBkibQBq1f61sS',
 '_version': 4,
 '_seq_no': 5,
 '_primary_term': 1,
 'found': True,
 '_source': {'title': 'Updated Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'}}

### 1.3 Add new Fields
This can be done using two methods either by `script` or `doc`

In [None]:

response1 = es.update(index='my_index', id=doc_id[0], doc={
    "author": "Purandeswaran"
})
response2 = es.update(index='my_index', id=doc_id[1], script={
    "source" : "ctx._source.author = params.author",
    "params" : {
        "author" : "Thandava"
    }
})

In [18]:
response1 = es.get(index='my_index', id=doc_id[0], ignore=[404])
response2 = es.get(index='my_index', id=doc_id[1], ignore=[404])
pprint(response1['_source'])
pprint(response2['_source'])

{'author': 'Purandeswaran',
 'created_on': '2024-09-22',
 'text': 'This is the first sample document text.',
 'title': 'Updated Title 1'}
{'author': 'Thandava',
 'created_on': '2024-09-24',
 'text': 'Here is another example of a document.',
 'title': 'Sample Title 2'}


  response1 = es.get(index='my_index', id=doc_id[0], ignore=[404])
  response2 = es.get(index='my_index', id=doc_id[1], ignore=[404])


### 1.4 Remove Fields

In [19]:
response = es.update(index='my_index', id=doc_id[0], script={
    "source": "ctx._source.remove('author' )"
})
response.body

{'_index': 'my_index',
 '_id': 'RKvP8JsBkibQBq1f61sS',
 '_version': 6,
 'result': 'updated',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 8,
 '_primary_term': 1}

In [20]:
response1 = es.get(index='my_index', id=doc_id[0], ignore=[404])
pprint(response1['_source'])

{'created_on': '2024-09-22',
 'text': 'This is the first sample document text.',
 'title': 'Updated Title 1'}


  response1 = es.get(index='my_index', id=doc_id[0], ignore=[404])


### 2. If documents doesn't exist in the index
We use `doc_as_upsert` to tell Elasticsearch that if the document does not exist, it should be inserted as a new document.

In [22]:
response = es.update(index='my_index', id=100, doc = {
    "title": "New Title",
    "text": "New Text Content"
    },
    doc_as_upsert=True
)
response.body

{'_index': 'my_index',
 '_id': '100',
 '_version': 1,
 'result': 'created',
 '_shards': {'total': 2, 'successful': 1, 'failed': 0},
 '_seq_no': 9,
 '_primary_term': 1}

In [23]:
doc_id.append(100)
response = es.get(index='my_index', id=100, ignore=[404])
pprint(response['_source'])

{'text': 'New Text Content', 'title': 'New Title'}


  response = es.get(index='my_index', id=100, ignore=[404])


In [24]:
count = es.count(index='my_index')
print(f'Total documents in my_index: {count.body["count"]}')

Total documents in my_index: 4
