### Elasticsearch Index Creation Example

In [1]:
from pprint import pprint
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")
client_info = es.info()
print("Elasticsearch Client Information:")
pprint(client_info)


Elasticsearch Client Information:
ObjectApiResponse({'name': '82c3cca05ad1', 'cluster_name': 'docker-cluster', 'cluster_uuid': 'PuXwAaOSSK-vVPeGsKp1QA', 'version': {'number': '8.15.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '1a77947f34deddb41af25e6f0ddb8e830159c179', 'build_date': '2024-08-05T10:05:34.233336849Z', 'build_snapshot': False, 'lucene_version': '9.11.1', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})


In [2]:
es.indices.create(index="my_index")

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [my_index/4WJnnuAGTQW-IhjtDEPjwg] already exists')

### cerate an index with settings and mappings

# shard
#### shard is a basic unit of storage in Elasticsearch. It is a self-contained index that can be stored on any node in the cluster. Each shard is a Lucene index, and it can be thought of as a single index that can be searched independently.

# replica
#### A replica is a copy of a shard. It is used to provide redundancy and high availability
#### in case a node fails. Each shard can have multiple replicas, and each replica is stored
#### on a different node in the cluster. This allows Elasticsearch to continue serving requests even if a node fails.

In [3]:
es.indices.delete(index="my_index")
es.indices.create(index="my_index",
                  settings={
                      "number_of_shards": 3,
                      "number_of_replicas": 2,}
                  )


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

### create an index with mappings

In [4]:
document={
    "title": "Elasticsearch Basics",
    "content": "This is a basic introduction to Elasticsearch.",
    "author": "Abhishek Nangare",
    "published_date": "2023-10-01",
    "tags": ["elasticsearch", "search", "indexing"]

}

responce=es.index(index="my_index",body=document)
print("Document indexed successfully:")
pprint(responce)

Document indexed successfully:
ObjectApiResponse({'_index': 'my_index', '_id': 'GMjzdZgB54fw1oOJy4Xa', '_version': 1, 'result': 'created', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1})


In [20]:
print(responce["result"])

created


In [5]:
print(responce["_shards"])

{'total': 3, 'successful': 1, 'failed': 0}


### insert multiple documents

In [6]:

import json

dummy_data=json.load(open("Data/data.json"))

In [7]:
print(dummy_data)

[{'id': 1, 'name': 'Alice Johnson', 'role': 'Software Engineer', 'age': 29, 'location': 'Bangalore', 'skills': ['Python', 'Elasticsearch', 'Docker'], 'joining_date': '2022-04-10', 'active': True}, {'id': 2, 'name': 'Bob Smith', 'role': 'Data Scientist', 'age': 35, 'location': 'Mumbai', 'skills': ['Python', 'Pandas', 'TensorFlow'], 'joining_date': '2021-08-22', 'active': True}, {'id': 3, 'name': 'Carol Williams', 'role': 'DevOps Engineer', 'age': 32, 'location': 'Delhi', 'skills': ['Kubernetes', 'AWS', 'Terraform'], 'joining_date': '2020-01-15', 'active': False}, {'id': 4, 'name': 'David Brown', 'role': 'Frontend Developer', 'age': 26, 'location': 'Pune', 'skills': ['React', 'JavaScript', 'CSS'], 'joining_date': '2023-01-05', 'active': True}]


In [8]:
def insert_data(data):
    responses = []
    for doc in data:
        response = es.index(index="my_index", body=doc)
        print(f"Document indexed successfully: {response['result']}")
        responses.append(response)
    return responses

def print_shard_info(response):
    print("Shard Information:")
    print(f"Total Shards: {response['_shards']['total']}")
    print(f"Successful Shards: {response['_shards']['successful']}")
    print(f"Failed Shards: {response['_shards']['failed']}")

responses = insert_data(dummy_data)
for response in responses:
    print_shard_info(response)


Document indexed successfully: created
Document indexed successfully: created
Document indexed successfully: created
Document indexed successfully: created
Shard Information:
Total Shards: 3
Successful Shards: 1
Failed Shards: 0
Shard Information:
Total Shards: 3
Successful Shards: 1
Failed Shards: 0
Shard Information:
Total Shards: 3
Successful Shards: 1
Failed Shards: 0
Shard Information:
Total Shards: 3
Successful Shards: 1
Failed Shards: 0


### Mapping


In [9]:
from elasticsearch import Elasticsearch
from pprint import pprint

# Connect to Elasticsearch
es = Elasticsearch("http://localhost:9200")

# Define the mapping
mapping = {
    "mappings": {
        "properties": {
            "created_on": {
                "type": "date"
            },
            "text": {
                "type": "text",
                "fields": {
                    "keyword": {
                        "type": "keyword"
                    }
                }
            },
            "title": {
                "type": "text"
            }
        }
    }
}

# Delete the index if it exists
es.indices.delete(index="my_index", ignore_unavailable=True)

# Create the index with the mapping
es.indices.create(index="my_index", body=mapping)

# Fetch and print the mapping to confirm
index_mapping = es.indices.get_mapping(index="my_index")
pprint(index_mapping)


ObjectApiResponse({'my_index': {'mappings': {'properties': {'created_on': {'type': 'date'}, 'text': {'type': 'text', 'fields': {'keyword': {'type': 'keyword'}}}, 'title': {'type': 'text'}}}}})
