In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")
client_info = es.info()
print("Elasticsearch Client Information:")
pprint(client_info)


Elasticsearch Client Information:
ObjectApiResponse({'name': '82c3cca05ad1', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'PuXwAaOSSK-vVPeGsKp1QA', 'version': {'number': '8.15.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179', 'build_date': '2024-08-05T10:05:34.233336849Z', 'build_snapshot': False, 'lucene_version': '9.11.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})




## Filed Data Types

### Comman Data Types
#### Binary

##### use _source (i.e document body ) to get the data back

❌ Not usable for full-text search

❌ Not usable for aggregations

❌ Not usable for sorting

✅ What binary fields can be used for
Storage only — like a mini file server or blob store
Retrieving the original file (e.g., download from frontend)

In [3]:
# Delete the index if it exists
if es.indices.exists(index="binary_index"):
    es.indices.delete(index="binary_index")

# Create the index with binary field mapping
es.indices.create(
    index="binary_index",
    body={
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0
        },
        "mappings": {
            "properties": {
                "file_content": {
                    "type": "binary"
                }
            }
        }
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'binary_index'})

In [4]:
import base64
# Sample binary data (e.g., an image or a file)
binary_data = base64.b64encode(b"This is a sample binary data.").decode('utf-8')
# Document to be indexed
document = {
    "file_content": binary_data
}
# Index the document
response = es.index(index="binary_index", body=document)
print("Document indexed successfully:")
pprint(response)


Document indexed successfully:
ObjectApiResponse({'_index': 'binary_index', '_id': 'HcgFdpgB54fw1oOJQYVl', '_version': 1, 'result': 'created', '_shards': {'total': 1, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})


## other data Types

In [5]:
es.indices.delete(index="other_data_types_index", ignore_unavailable=True)
es.indices.create(index="other_data_types_index", mappings=
                  {
        "properties": {
            "boolean_field": {"type": "boolean"},
            "date_field": {"type": "date"},
            "geo_point_field": {"type": "geo_point"},
            "keyword_field": {"type": "keyword"},
            "long_field": {"type": "long"},
            "text_field": {"type": "text"}
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'other_data_types_index'})

In [6]:
document={
    "boolean_field": True,
    "date_field": "2023-10-01",
    "geo_point_field": {"lat": 40.7128, "lon": -74.0060},
    "keyword_field": "example_keyword",
    "long_field": 1234567890123456789,
    "text_field": "This is an example of a text field."
}

response = es.index(index="other_data_types_index", body=document)

In [7]:
print("Document indexed successfully:")
pprint(response)

Document indexed successfully:
ObjectApiResponse({'_index': 'other_data_types_index', '_id': 'HsgKdpgB54fw1oOJM4Uy', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})


### object data type
#### An object data type is used to store structured data in a JSON-like format. It allows you to index and search nested fields within a document.
#### It is useful for representing complex data structures, such as user profiles, product details, or any other hierarchical data.
#### It can contain multiple fields, each with its own data type, and supports nested objects.

In [8]:
es.indices.delete(index="object_index", ignore_unavailable=True)
es.indices.create(index="object_index",
                  mappings={
    "properties": {
        "user": {
            "type": "object",
            "properties": {
                "name": {"type": "text"},
                "email": {"type": "keyword"},
                "address": {
                    "type": "object",
                    "properties": {
                        "street": {"type": "text"},
                        "city": {"type": "text"},
                        "state": {"type": "keyword"},
                        "zip": {"type": "keyword"}
                    }
                  }
            }
        }
    }
                  }
                  )

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'object_index'})

In [11]:
document={
    "user": {
        "name": "John Doe",
        "email": "abhisheknangare@gmail.com",
        "address":{
            "street": "123 Main St",
            "city": "Springfield",
            "state": "IL",
            "zip": "62701"
        }
    }
}
response = es.index(index="object_index", body=document)
print("Document indexed successfully:")
pprint(response)

Document indexed successfully:
ObjectApiResponse({'_index': 'object_index', '_id': 'IMgSdpgB54fw1oOJbIW7', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})


### flattened data type
#### The flattened data type is used to index and search large JSON objects with many fields. It
#### allows you to index fields with dynamic names without defining a fixed schema.
#### It is useful for scenarios where you have a large number of fields with varying names, such
#### as log data, event data, or any other unstructured data.

In [13]:
es.indices.delete(index="flatten_object_index", ignore_unavailable=True)
es.indices.create(index="flatten_object_index",
                  mappings={
    "properties": {
        "event_data": {
            "type": "flattened"
        }
    }
                  }
                  )


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flatten_object_index'})

In [14]:
document={
    "event_data": {
        "event_type": "user_login",
        "user_id": "12345",
        "timestamp": "2023-10-01T12:00:00Z",
        "location": "New York",
        "device": "mobile"
    }
}
response = es.index(index="flatten_object_index", body=document)
print("Document indexed successfully:")
pprint(response)

Document indexed successfully:
ObjectApiResponse({'_index': 'flatten_object_index', '_id': 'IcgUdpgB54fw1oOJKoW2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})


### Nested data type
#### The nested data type is used to index and search arrays of objects. It allows you to
#### perform complex queries on nested fields, such as filtering, sorting, and aggregating.
#### It is useful for scenarios where you have arrays of objects with multiple fields, such as
#### product reviews, user comments, or any other hierarchical data.


In [15]:
es.indices.delete(index="nested_data_type", ignore_unavailable=True)
es.indices.create(index="nested_data_type",
                  mappings={
    "properties": {
        "product": {
            "type": "object",
            "properties": {
                "name": {"type": "text"},
                "reviews": {
                    "type": "nested",
                    "properties": {
                        "user": {"type": "text"},
                        "rating": {"type": "integer"},
                        "comment": {"type": "text"}
                    }
                }
            }
        }
    }
                  }
                  )

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'nested_data_type'})

In [16]:
document={
    "product": {
        "name": "Sample Product",
        "reviews": [
            {
                "user": "Alice",
                "rating": 5,
                "comment": "Great product!"
            },
            {
                "user": "Bob",
                "rating": 4,
                "comment": "Good value for money."
            }
        ]
    }
}
response = es.index(index="nested_data_type", body=document)
print("Document indexed successfully:")
pprint(response)

Document indexed successfully:
ObjectApiResponse({'_index': 'nested_data_type', '_id': 'IsgVdpgB54fw1oOJrYXI', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})


### Completion data type
#### The completion data type is used to implement autocomplete or suggestion features. It allows you to index
#### and search for partial matches in text fields, making it suitable for search-as-you-type functionality
#### It is useful for scenarios where you want to provide real-time suggestions based on user input, such as search
#### queries, product names, or any other text-based input.


In [18]:
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

# Delete the index if it exists
es.indices.delete(index="completion_data_type", ignore_unavailable=True)

# Create the index with a 'completion' field
es.indices.create(
    index="completion_data_type",
    mappings={
        "properties": {
            "suggestion": {
                "type": "completion",
                "analyzer": "simple",
                "search_analyzer": "simple"
            }
        }
    }
)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'completion_data_type'})

In [19]:
# Index movie titles
es.index(index="completion_data_type", id=1, document={
    "suggestion": {
        "input": ["Spider-Man"]
    }
})

es.index(index="completion_data_type", id=2, document={
    "suggestion": {
        "input": ["Spirited Away"]
    }
})

es.index(index="completion_data_type", id=3, document={
    "suggestion": {
        "input": ["Split"]
    }
})


ObjectApiResponse({'_index': 'completion_data_type', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1})

In [20]:
response = es.search(index="completion_data_type", body={
    "suggest": {
        "movie-suggest": {
            "prefix": "spi",
            "completion": {
                "field": "suggestion"
            }
        }
    }
})

# Print suggestions
suggestions = response["suggest"]["movie-suggest"][0]["options"]
for opt in suggestions:
    print(opt["_source"])


{'suggestion': {'input': ['Spider-Man']}}
{'suggestion': {'input': ['Spirited Away']}}
