In [3]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch('http://localhost:9200')
client_info = es.info()
print('Connected to Elasticsearch')
pprint(client_info.body)

Connected to Elasticsearch
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'ZLPKoMvyRwO3jn9eeAD8Ug',
 'name': '310ad8ef32ea',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-08-05T10:05:34.233336849Z',
             'build_flavor': 'default',
             'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.11.1',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.15.0'}}


# Update documents

In [4]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [5]:
import os
import json

document_ids = []
dummy_data = json.load(open(os.path.join(os.getcwd(), "data", "dummy.json")))
for document in dummy_data:
    response = es.index(index='my_index', body=document)
    document_ids.append(response['_id'])

document_ids

['C9jKLpMBnw3A4L5g-wRK', 'DNjKLpMBnw3A4L5g-wSR', 'DdjKLpMBnw3A4L5g-wSc']

## Edit fields

In [6]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    script={
        "source": "ctx._source.title = params.title",
        "params": {
            "title": "New Title"
        }
    },
)
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 3,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


In [7]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 3,
 '_source': {'created_on': '2024-09-22',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 2,
 'found': True}


In [25]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    doc={
        "title": "dummy",
    },
)
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 12,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 10,
 'result': 'updated'}


In [26]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 12,
 '_source': {'created_on': '2024-09-22',
             'new_value': 'dummy_value',
             'text': 'This is the first sample document text.',
             'title': 'dummy'},
 '_version': 10,
 'found': True}


## Add fields

In [8]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    script={
        "source": "ctx._source.new_field = 'dummy_value'",
    },
)
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 4,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 3,
 'result': 'updated'}


In [9]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 4,
 '_source': {'created_on': '2024-09-22',
             'new_field': 'dummy_value',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 3,
 'found': True}


In [16]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    doc={
        "new_value_2": "dummy_value_2",
    },
)
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 8,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 7,
 'result': 'updated'}


In [11]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 5,
 '_source': {'created_on': '2024-09-22',
             'new_field': 'dummy_value',
             'new_value_2': 'dummy_value_2',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 4,
 'found': True}


## Remove fields

In [12]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    script={
        "source": "ctx._source.remove('new_field')",
    },
)
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 6,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 5,
 'result': 'updated'}


In [13]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 6,
 '_source': {'created_on': '2024-09-22',
             'new_value_2': 'dummy_value_2',
             'text': 'This is the first sample document text.',
             'title': 'New Title'},
 '_version': 5,
 'found': True}


In [None]:
response = es.update(
    index="my_index",
    id=document_ids[0],
    script={
        "source": """
            ctx._source.remove('new_value_2');
            ctx._source.title = 'Test';
        """
    },
)
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 9,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 8,
 'result': 'updated'}


In [20]:
response = es.get(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'C9jKLpMBnw3A4L5g-wRK',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 9,
 '_source': {'created_on': '2024-09-22',
             'text': 'This is the first sample document text.',
             'title': 'Test'},
 '_version': 8,
 'found': True}


## Update or create

In [21]:
response = es.update(
    index="my_index",
    id="1",
    doc={
        "book_id": 1234,
        "book_name": "A book",
    },
    doc_as_upsert=True,
)
pprint(response.body)

{'_id': '1',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 10,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 1,
 'result': 'created'}


In [23]:
response = es.count(index='my_index')
response['count']

4