In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es=Elasticsearch("http://localhost:9200")
client_info = es.info()
print("Connected to Elasticsearch!")
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'es-docker-cluster',
 'cluster_uuid': '43JLb8LZRh2z6LwR5bBHMw',
 'name': 'elasticsearch',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2024-12-11T12:08:05.663969764Z',
             'build_flavor': 'default',
             'build_hash': '2b6a7fed44faa321997703718f07ee0420804b41',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '9.12.0',
             'minimum_index_compatibility_version': '7.0.0',
             'minimum_wire_compatibility_version': '7.17.0',
             'number': '8.17.0'}}


# Create index

In this method, the mappings which define the structure of documents within an index are infered automatically

In [2]:
es.indices.delete(index="my_index",ignore_unavailable=True)
es.indices.create(index="my_index")

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

`Shards`: Elasticsearch divides the data in an index into multiple shards. Each shard is a self-contained index that Elasticsearch can distribute across multiple nodes in a cluster. Shards are managed automatically but configured when creating the index.

`Replicas`: For fault tolerance and high availability, an index can have replica shards, which are copies of the primary shards.

In [3]:
es.indices.delete(index="my_index",ignore_unavailable=True)
es.indices.create(
    index="my_index",
    settings={
        "index": {
            "number_of_shards":3,
        "number_of_replicas":2
        },
    },
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [4]:
document={
    "title":"title",
    "text":"text",
    "created_on": "2024-09-22"
}
response=es.index(index="my_index",body=document)
response

ObjectApiResponse({'_index': 'my_index', '_id': 'U2zIk5wBQmTch9OjYwVx', '_version': 1, 'result': 'created', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [5]:
print(response['result'])
print(response["_shards"])
print(response["_id"])
print(response["_index"])

created
{'total': 3, 'successful': 1, 'failed': 0}
U2zIk5wBQmTch9OjYwVx
my_index


In [6]:
import json

dummy_data = json.load(open("../data/dummy_data.json"))
dummy_data

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Here is another example of a document.',
  'created_on': '2024-09-24'},
 {'title': 'Sample Title 3',
  'text': 'The content of the third document goes here.',
  'created_on': '2024-09-24'}]

In [7]:
def insert_document(document):
    response=es.index(index="my_index",body=document)
    return response 

def print_info(response):
    print(f"""Document ID: {response['_id']} is '{
          response["result"]}' and is split into {response['_shards']['total']} shards.""")


for document in dummy_data:
    response = insert_document(document)
    print_info(response)

Document ID: VGzIk5wBQmTch9OjYwXH is 'created' and is split into 3 shards.
Document ID: VWzIk5wBQmTch9OjYwXO is 'created' and is split into 3 shards.
Document ID: VmzIk5wBQmTch9OjYwXW is 'created' and is split into 3 shards.


# Print mapping

In [8]:
index_mapping = es.indices.get_mapping(index='my_index')
pprint(index_mapping["my_index"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


# Manual mapping

In [9]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

mapping = {
    'properties': {
        'created_on': {'type': 'date'},
        'text': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        },
        'title': {
            'type': 'text',
            'fields': {
                'keyword': {
                    'type': 'keyword',
                    'ignore_above': 256
                }
            }
        }
    }
}
es.indices.put_mapping(index="my_index", body=mapping)

index_mapping = es.indices.get_mapping(index='my_index')
pprint(index_mapping["my_index"]["mappings"]["properties"])

{'created_on': {'type': 'date'},
 'text': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
          'type': 'text'},
 'title': {'fields': {'keyword': {'ignore_above': 256, 'type': 'keyword'}},
           'type': 'text'}}


# Common types

###  Binary

In [10]:
es.indices.delete(index='binary_index', ignore_unavailable=True)
es.indices.create(
    index='binary_index',
    mappings={
        "properties": {
            "image_data": {
                "type": "binary"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'binary_index'})

In [11]:
import base64

image_path = "../images/field_data_types_docs.png"
with open(image_path, "rb") as image_file:
    image_bytes = image_file.read()
    image_base64 = base64.b64encode(image_bytes).decode("utf-8")

image_base64[:100]

'iVBORw0KGgoAAAANSUhEUgAAB4AAAAJTCAYAAADpMAvgAAAABHNCSVQICAgIfAhkiAAAABl0RVh0U29mdHdhcmUAZ25vbWUtc2Ny'

In [12]:
document = {
    "image_data": image_base64
}
response = es.index(index='binary_index', body=document)
response

ObjectApiResponse({'_index': 'binary_index', '_id': 'V2zIk5wBQmTch9OjZQU-', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

### Others

In [13]:
es.indices.delete(index='other_common_data_types_index',
                  ignore_unavailable=True)
es.indices.create(
    index='other_common_data_types_index',
    mappings={
        "properties": {
            "book_reference": {
                "type": "keyword"
            },
            "price": {
                "type": "float"
            },
            "publish_date": {
                "type": "date"
            },
            "is_available": {
                "type": "boolean"
            },
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'other_common_data_types_index'})

In [14]:
document = {
    "book_reference": "978-1617294433",
    "price": 44.99,
    "publish_date": "2021-06-30",
    "is_available": True
}
response = es.index(index='other_common_data_types_index', body=document)
response

ObjectApiResponse({'_index': 'other_common_data_types_index', '_id': 'WGzIk5wBQmTch9OjZQXy', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Object types

### Object

In [15]:
es.indices.delete(index='object_index', ignore_unavailable=True)
es.indices.create(
    index='object_index',
    mappings={
        "properties": {
            "author": {
                "properties": {
                    "first_name": {
                        "type": "text"
                    },
                    "last_name": {
                        "type": "text"
                    }
                }
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'object_index'})

In [16]:
document = {
    "author": {
        "first_name": "Imad",
        "last_name": "Saddik"
    }
}
response = es.index(index='object_index', body=document)
response

ObjectApiResponse({'_index': 'object_index', '_id': 'WWzIk5wBQmTch9OjZgXC', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

## Flattened object

In [17]:
es.indices.delete(index='flattened_object_index', ignore_unavailable=True)
es.indices.create(
    index='flattened_object_index',
    mappings={
        "properties": {
            "author": {
                "type": "flattened"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flattened_object_index'})

In [18]:
document = {
    "author": {
        "first_name": "Imad",
        "last_name": "Saddik"
    }
}
response = es.index(index='flattened_object_index', body=document)
response

ObjectApiResponse({'_index': 'flattened_object_index', '_id': 'WmzIk5wBQmTch9OjZwVr', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

## Nested object

In [19]:
es.indices.delete(index='nested_object_index', ignore_unavailable=True)
es.indices.create(
    index='nested_object_index',
    mappings={
        "properties": {
            "user": {
                "type": "nested",
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'nested_object_index'})

In [20]:
documents = [
    {
        "first": "John",
        "last": "Smith"
    },
    {
        "first": "Imad",
        "last": "Saddik"
    }
]
response = es.index(index='nested_object_index', body={"user": documents})
response

ObjectApiResponse({'_index': 'nested_object_index', '_id': 'W2zIk5wBQmTch9OjaAUL', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Text search types

In [21]:
es.indices.delete(index='text_index', ignore_unavailable=True)
es.indices.create(
    index="text_index",
    mappings={
        "properties":{
            "email_body":{
                "type":"text"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'text_index'})

In [22]:
document = {
    "email_body": "Hello, this is a test email."
}
response = es.index(index='text_index', body=document)
response

ObjectApiResponse({'_index': 'text_index', '_id': 'XGzIk5wBQmTch9OjaAXQ', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Completion

In [23]:
es.indices.delete(index='text_completion_index', ignore_unavailable=True)
es.indices.create(
    index='text_completion_index',
    mappings={
        "properties": {
            "suggest": {
                "type": "completion"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'text_completion_index'})

In [24]:
document_1 = {
    "suggest": {
        "input": ["Mars", "Planet"]
    }
}

document_2 = {
    "suggest": {
        "input": ["Andromeda", "Galaxy"]
    }
}

es.index(index='text_completion_index', body=document_1)
es.index(index='text_completion_index', body=document_2)

ObjectApiResponse({'_index': 'text_completion_index', '_id': 'XmzIk5wBQmTch9OjaQV_', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

#  Spatial data types

In [25]:
es.indices.delete(index='geo_point_index', ignore_unavailable=True)
es.indices.create(
    index='geo_point_index',
    mappings={
        "properties": {
            "location": {
                "type": "geo_point"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'geo_point_index'})

In [26]:
document = {
    "text": "Geopoint as an object using GeoJSON format",
    "location": {
        "type": "Point",
        "coordinates": [
            -71.34,
            41.12
        ]
    }
}
response = es.index(index='geo_point_index', body=document)
response

ObjectApiResponse({'_index': 'geo_point_index', '_id': 'X2zIk5wBQmTch9OjagUj', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

# Geo shape

In [27]:
es.indices.delete(index='geo_shape_index', ignore_unavailable=True)
es.indices.create(
    index='geo_shape_index',
    mappings={
        "properties": {
            "location": {
                "type": "geo_shape"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'geo_shape_index'})

In [28]:
document_1 = {
    "location": {
        "type": "LineString",
        "coordinates": [
            [
                -77.03653,
                38.897676
            ],
            [
                -77.009051,
                38.889939
            ]
        ]
    }
}
document_2 = {
    "location": {
        "type": "Polygon",
        "coordinates": [
            [
                [100, 0],
                [101, 0],
                [101, 1],
                [100, 1],
                [100, 0],
            ],
            [
                [100.2, 0.2],
                [100.8, 0.2],
                [100.8, 0.8],
                [100.2, 0.8],
                [100.2, 0.2],
            ]
        ]
    }
}

es.index(index='geo_shape_index', body=document_1)
es.index(index='geo_shape_index', body=document_2)

ObjectApiResponse({'_index': 'geo_shape_index', '_id': 'YWzIk5wBQmTch9OjawUl', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

#  Point

In [29]:
es.indices.delete(index='point_index', ignore_unavailable=True)
es.indices.create(
    index='point_index',
    mappings={
        "properties": {
            "location": {
                "type": "point"
            }
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'point_index'})

In [30]:
document = {
    "location": {
        "type": "Point",
        "coordinates": [
            -71.34,
            41.12
        ]
    }
}

response = es.index(index='point_index', body=document)
response

ObjectApiResponse({'_index': 'point_index', '_id': 'YmzIk5wBQmTch9OjawX2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})

In [31]:
import json
from tqdm import tqdm


document_ids = []
dummy_data = json.load(open("../data/dummy_data.json"))
for document in tqdm(dummy_data, total=len(dummy_data)):
    response = es.index(index='my_index', body=document)
    document_ids.append(response['_id'])

  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 3/3 [00:00<00:00, 129.28it/s]


In [32]:
document_ids

['Y2zIk5wBQmTch9OjbAUV', 'ZGzIk5wBQmTch9OjbAUd', 'ZWzIk5wBQmTch9OjbAUk']

In [33]:
response = es.delete(index='my_index', id=document_ids[0])
pprint(response.body)

{'_id': 'Y2zIk5wBQmTch9OjbAUV',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 3,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'deleted'}


In [34]:
try:
    response = es.delete(index='my_index', id="id")
except Exception as e:
    print(e)

NotFoundError(404, "{'_index': 'my_index', '_id': 'id', '_version': 1, 'result': 'not_found', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1}")


# Get operation
This is an example of a successful operation. If the ID exists in the document, the get operation won't return any errors

In [35]:
response = es.get(index='my_index', id=document_ids[1])
pprint(response.body)

{'_id': 'ZGzIk5wBQmTch9OjbAUd',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 1,
 '_source': {'created_on': '2024-09-24',
             'text': 'Here is another example of a document.',
             'title': 'Sample Title 2'},
 '_version': 1,
 'found': True}


# Count operation

In [36]:
response = es.count(index='my_index')
count = response["count"]

print(f"The number of documents in the index is {count}")

The number of documents in the index is 0


In [37]:
query = {
    "range": {
        "created_on": {
            "gte": "2024-09-24",
            "lte": "2024-09-24",
            "format": "yyyy-MM-dd"
        }
    }
}

response = es.count(index='my_index', query=query)
count = response["count"]

print(f"The number of documents in the index is {count}")

The number of documents in the index is 0


# Exists API

In [38]:

response = es.indices.exists(index='my_index')
response.body

True

In [39]:
response = es.exists(index='my_index', id=document_ids[1])
response.body

True

# Update API

### If documents exists in the index

In [40]:
response = es.update(
    index="my_index",
    id=document_ids[1],
    script={
        "source": "ctx._source.title = params.title",
        "params": {
            "title": "New Title"
        }
    },
)
pprint(response.body)

{'_id': 'ZGzIk5wBQmTch9OjbAUd',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 5,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 2,
 'result': 'updated'}


In [41]:
response = es.get(index='my_index', id=document_ids[1])
pprint(response.body)

{'_id': 'ZGzIk5wBQmTch9OjbAUd',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 5,
 '_source': {'created_on': '2024-09-24',
             'text': 'Here is another example of a document.',
             'title': 'New Title'},
 '_version': 2,
 'found': True}


# Add a new field
To add a new field, you can either use the script argument or the doc argument.

In [42]:
response = es.update(
    index="my_index",
    id=document_ids[1],
    script={
        "source": "ctx._source.new_field = 'dummy_value'",
    },
)
pprint(response.body)

{'_id': 'ZGzIk5wBQmTch9OjbAUd',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 6,
 '_shards': {'failed': 0, 'successful': 1, 'total': 2},
 '_version': 3,
 'result': 'updated'}


In [43]:
response = es.get(index='my_index', id=document_ids[1])
pprint(response.body)

{'_id': 'ZGzIk5wBQmTch9OjbAUd',
 '_index': 'my_index',
 '_primary_term': 1,
 '_seq_no': 6,
 '_source': {'created_on': '2024-09-24',
             'new_field': 'dummy_value',
             'text': 'Here is another example of a document.',
             'title': 'New Title'},
 '_version': 3,
 'found': True}


# Bulk API

In [44]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(index='my_index')

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

In [45]:
response = es.bulk(
    operations=[
        # Action 1
        {
            "index": {
                "_index": "my_index",
                "_id": "1"
            }
        },
        # Source 1
        {
            "title": "Sample Title 1",
            "text": "This is the first sample document text.",
            "created_on": "2024-09-22"
        },
        # Action 2
        {
            "index": {
                "_index": "my_index",
                "_id": "2"
            }
        },
        # Source 2
        {
            "title": "Sample Title 2",
            "text": "Here is another example of a document.",
            "created_on": "2024-09-24"
        },
        # Action 3
        {
            "index": {
                "_index": "my_index",
                "_id": "3"
            }
        },
        # Source 3
        {
            "title": "Sample Title 3",
            "text": "The content of the third document goes here.",
            "created_on": "2024-09-24"
        },
        # Action 4
        {
            "update": {
                "_id": "1",
                "_index": "my_index"
            }
        },
        # Source 4
        {
            "doc": {
                "title": "New Title"
            }
        },
        # Action 5
        {
            "update": {
                "_id": "2",
                "_index": "my_index"
            }
        },
        # Source 5
        {
            "doc": {
                "new_field": "dummy_value"
            }
        },
        # Action 6
        {
            "delete": {
                "_index": "my_index",
                "_id": "3"
            }
        },
    ],
)

pprint(response.body)


{'errors': False,
 'items': [{'index': {'_id': '1',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '2',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '3',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
        

In [46]:
response.body["errors"]

False

# Searching

In [47]:
response = es.search(
    index='my_index',
    body={
        "query": {"match_all": {}}
    }
)

n_hits = response['hits']['total']['value']
print(f"Found {n_hits} documents in index_1")

Found 0 documents in index_1


In [48]:
response['hits']

{'total': {'value': 0, 'relation': 'eq'}, 'max_score': None, 'hits': []}

# Inserting documents

In [49]:
dummy_data=json.load(open("../data/dummy_data_2.json"))
for _ in range(10):
    dummy_data+=dummy_data
len(dummy_data)

5120

In [50]:
op=[]
for doc in dummy_data:
    op.append({'index':{'_index':'my_index'}})
    op.append(doc)
es.bulk(operations=op)

ObjectApiResponse({'errors': False, 'took': 400, 'items': [{'index': {'_index': 'my_index', '_id': 'Z2zNk5wBQmTch9Oj2QU4', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'aGzNk5wBQmTch9Oj2QU4', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'aWzNk5wBQmTch9Oj2QU4', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'amzNk5wBQmTch9Oj2QU4', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'my_index', '_id': 'a2zNk5wBQmTch9Oj2QU4', '_version': 1, 'result': 'created', '_shards': {'tota

# Searching

In [53]:
response = es.search(
    index="my_index",
    body={
        "query": {
            "match_all": {}
        },
        "size": 10,
        "from": 10
    },
)
for hit in response['hits']['hits']:
    print(hit['_source'])

{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'This is an important keyword search result.', 'age': 25, 'price': 100.0}
{'message': 'Another search result with an important keyword.', 'age': 30, 'price': 150.0}
{'message': 'Keyword match in this result as well.', 'age': 40, 'price': 200.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'This is an important keyword search result.', 'age': 25, 'price': 100.0}
{'message': 'Another search result with an important keyword.', 'age': 30, 'price': 150.0}
{'message': 'Keyword match in this result as well.', 'age': 40, 'price': 200.0}


# Timeout
This example shows how to set a timeout for the search query. If the query takes longer than the specified 10s (10 seconds), it will be aborted

In [54]:
response=es.search(
    index="my_index",
    body={
        "query":{
            "match":{
                "message":"search keyword"
            }
        },
        "timeout":"10s"
    },
)
response.body

{'took': 6,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 5120, 'relation': 'eq'},
  'max_score': 0.8941701,
  'hits': [{'_index': 'my_index',
    '_id': 'Z2zNk5wBQmTch9Oj2QU4',
    '_score': 0.8941701,
    '_source': {'message': 'This is an important keyword search result.',
     'age': 25,
     'price': 100.0}},
   {'_index': 'my_index',
    '_id': 'aGzNk5wBQmTch9Oj2QU4',
    '_score': 0.8941701,
    '_source': {'message': 'Another search result with an important keyword.',
     'age': 30,
     'price': 150.0}},
   {'_index': 'my_index',
    '_id': 'bGzNk5wBQmTch9Oj2QU4',
    '_score': 0.8941701,
    '_source': {'message': 'This is an important keyword search result.',
     'age': 25,
     'price': 100.0}},
   {'_index': 'my_index',
    '_id': 'bWzNk5wBQmTch9Oj2QU4',
    '_score': 0.8941701,
    '_source': {'message': 'Another search result with an important keyword.',
     'age': 30,
     'price': 150.0}},
   

#  Aggregation
In this example, we perform an aggregation to calculate the average value of the age field across all documents that match the query. The result of the aggregation is stored in the avg_age key.

In [55]:
response=es.search(
    index="my_index",
    body={
        "query":{
            "match_all":{}
        },
        "aggs":{
            "avg_age":{
                "avg":{
                    "field":"age"
                }
            }
        }
    }
)
avg_age=response['aggregations']['avg_age']['value']
print(f"Average Age: {avg_age}")

Average Age: 31.6


# Combining size, from, timeout, and aggs
Here we combine multiple parameters: we limit the results to 5 documents (size), skip the first 20 documents (from), set a timeout of 5 seconds (timeout), and perform a maximum aggregation (aggs) on the price field. This demonstrates how to use multiple search parameters together

In [57]:
response=es.search(
    index="my_index",
    body={
        "query":{
            "match":{
                "message":"important keyword"
            }
        },
        "aggs":{
            "max_price":{
                "max":{
                    "field":"price"
                }
            }
        },
        "size":5,
        "from":20,
        "timeout":"5s"
    }
)

for hit in response['hits']['hits']:
    print(hit['_source'])
max_price = response['aggregations']['max_price']['value']
print(f"Max Price: {max_price}")

{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
{'message': 'Final document with the important keyword.', 'age': 28, 'price': 180.0}
{'message': 'Important keyword again in this document.', 'age': 35, 'price': 120.0}
Max Price: 200.0


# Inserting documents
Because we are using the dense_vector data type, we need to do the mapping manually.

In [58]:
es.indices.delete(index='my_index',ignore_unavailable=True)
es.indices.create(
    index="my_index",
    mappings={
        "properties":{
            "sides_length":{
                "type":"dense_vector",
                "dims":4
            },
            "shape":{
                "type":"keyword"
            }
        }
    },

)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

#  Valid case
We specified that sides_length should be a vector with 4 values. Since this condition was met, the operation was executed successfully.

In [62]:
from pprint import pprint
response=es.index(
    index="my_index",
    id=1,
    document={
        "shape":"square",
        "sides_length":[5,5,5,5],
    }
)
print(response.body)
print('-'*100)
pprint(es.indices.get_mapping(index='my_index').body)

{'_index': 'my_index', '_id': '1', '_version': 4, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}
----------------------------------------------------------------------------------------------------
{'my_index': {'mappings': {'properties': {'shape': {'type': 'keyword'},
                                          'sides_length': {'dims': 4,
                                                           'index': True,
                                                           'index_options': {'ef_construction': 100,
                                                                             'm': 16,
                                                                             'type': 'int8_hnsw'},
                                                           'similarity': 'cosine',
                                                           'type': 'dense_vector'}}}}}


# Invalid case
In this case, the operation fails because indexing a matrix is not supported with the dense_vector field type.

In [63]:
response=es.index(
    index='my_index',
    id=2,
    document={
        'shape':'square',
        'sides_length':[[5,5],[5,5]],
    }
)
pprint(response.body)

BadRequestError: BadRequestError(400, 'document_parsing_exception', 'Failed to parse object: expecting token of type [VALUE_NUMBER] but found [START_ARRAY]')