initializing elastic search api


In [1]:
from elasticsearch import Elasticsearch

# Create an Elasticsearch client instance with authentication
es = Elasticsearch(
    "https://localhost:9200",  # Use https for SSL
    basic_auth=(
        "elastic",
        "8+6x+mXMgyoyi0F7Fi07",
    ),  # Replace with your username and password
    verify_certs=False,  # Optional: Disable SSL certificate verification for development
)

  _transport = transport_class(


checking the health of the cluster


In [None]:
# Get the cluster health
cluster_health = es.cluster.health()

# Print the cluster health status
print(cluster_health)

listing all nodes


In [None]:
nodes_info = es.cat.nodes(v=True)
print(nodes_info)

listing all indices


In [None]:
indices_info = es.cat.indices(v=True, expand_wildcards="all")
print(indices_info)

delete and creating indices


In [146]:
# Delete the 'products' index, ignoring 404 (index not found) errors
es.options(ignore_status=[404]).indices.delete(index="products")

# Create the 'products' index with specific settings
es.indices.create(
    index="products",
    body={"settings": {"number_of_shards": 2, "number_of_replicas": 2}},
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'products'})

getting meta data of the indices


In [None]:
es.indices.get(index="products")

add data to index without id


In [110]:
# Index a new document (auto-generated ID)
es.index(index="products", body={"name": "Coffee Maker", "price": 64, "in_stock": 10})

ObjectApiResponse({'_index': 'products', '_id': '2K2TrpIB-9QmpfaGedZP', '_version': 1, 'result': 'created', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

add data to index with id


In [111]:
# Create or update a document with a specific ID (100)
es.index(index="products", id=100, body={"name": "Toaster", "price": 49, "in_stock": 4})

ObjectApiResponse({'_index': 'products', '_id': '100', '_version': 2, 'result': 'updated', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 41, '_primary_term': 1})

get data by id


In [113]:
# Get the document with ID 100 from the 'products' index
doc = es.get(index="products", id=100)

# Print the document
print(doc["_source"])

{'name': 'Toaster', 'price': 49, 'in_stock': 3}


update a document by id


In [112]:
# Update the document with ID 100 in the 'products' index
es.update(index="products", id=100, body={"doc": {"in_stock": 3}})

ObjectApiResponse({'_index': 'products', '_id': '100', '_version': 3, 'result': 'updated', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 42, '_primary_term': 1})

add fields to a document by id


In [None]:
# Update the document with ID 100 to add the 'tags' field
es.update(index="products", id=100, body={"doc": {"tags": ["electronics"]}})

scripted updates


In [None]:
# Update the document with ID 100 in the 'products' index

# 1. Decrement the 'in_stock' field by 1
es.update(
    index="products", id=100, body={"script": {"source": "ctx._source.in_stock--"}}
)

# 2. Set the 'in_stock' field to 10
es.update(
    index="products", id=100, body={"script": {"source": "ctx._source.in_stock = 10"}}
)

# 3. Decrement the 'in_stock' field by a specific quantity
es.update(
    index="products",
    id=100,
    body={
        "script": {
            "source": "ctx._source.in_stock -= params.quantity",
            "params": {"quantity": 4},
        }
    },
)

# 4. Perform 'noop' if 'in_stock' is 0, else decrement it by 1
es.update(
    index="products",
    id=100,
    body={
        "script": {
            "source": """
                if (ctx._source.in_stock == 0) {
                    ctx.op = 'noop';
                }
                ctx._source.in_stock--;
            """
        }
    },
)

# 5. Decrement 'in_stock' by 1 if it is greater than 0
es.update(
    index="products",
    id=100,
    body={
        "script": {
            "source": """
                if (ctx._source.in_stock > 0) {
                    ctx._source.in_stock--;
                }
            """
        }
    },
)

# 6. Delete the document if 'in_stock' is less than 0, else decrement it by 1
es.update(
    index="products",
    id=100,
    body={
        "script": {
            "source": """
                if (ctx._source.in_stock < 0) {
                    ctx.op = 'delete';
                }
                ctx._source.in_stock--;
            """
        }
    },
)

upserting, it means that if the document with id 101 doesn't exists, in this example it creates a new document


In [109]:
# Update the document with ID 101 in the 'products' index, incrementing 'in_stock' by 1.
# If the document does not exist, create it with the specified fields.
es.update(
    index="products",
    id=101,
    body={
        "script": {"source": "ctx._source.in_stock++"},
        "upsert": {"name": "Blender", "price": 399, "in_stock": 5},
    },
)

ObjectApiResponse({'_index': 'products', '_id': '100', '_version': 1, 'result': 'created', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 40, '_primary_term': 1})

In this command, we replaced the document that have id 100, we did not modify it


In [133]:
# Create or update the document with ID 100 in the 'products' index.
es.index(index="products", id=100, body={"name": "Toaster", "price": 79, "in_stock": 4})

ObjectApiResponse({'_index': 'products', '_id': '100', '_version': 2, 'result': 'updated', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1})

delete document by id


In [115]:
# Delete the document with ID 101 from the 'products' index.
es.options(ignore_status=[404]).delete(
    index="products",
    id=101,
)

ObjectApiResponse({'_index': 'products', '_id': '101', '_version': 9, 'result': 'not_found', '_shards': {'total': 3, 'successful': 1, 'failed': 0}, '_seq_no': 44, '_primary_term': 1})

the next command ensures no conflicts when multiple queries are applied to the same document, first we have to get the product and save the primaryterm and the seq no, and add them in the query


In [None]:
# Update the document with ID 100 in the 'products' index, only if primary term and seq_no match.
es.update(
    index="products",
    id=100,
    body={"doc": {"in_stock": 123}},
    if_primary_term=X,  # Replace X with the actual primary term
    if_seq_no=X,  # Replace X with the actual sequence number
)

update all matches from the products index


In [116]:
# Perform an update by query on all documents in the 'products' index to decrement 'in_stock' field.
es.update_by_query(
    index="products",
    body={"script": {"source": "ctx._source.in_stock--"}, "query": {"match_all": {}}},
)

ObjectApiResponse({'took': 244, 'timed_out': False, 'total': 7, 'updated': 7, 'deleted': 0, 'batches': 1, 'version_conflicts': 0, 'noops': 0, 'retries': {'bulk': 0, 'search': 0}, 'throttled_millis': 0, 'requests_per_second': -1.0, 'throttled_until_millis': 0, 'failures': []})

delete by query


In [117]:
# Perform a delete by query to remove all documents in the 'products' index.
es.delete_by_query(
    index="products", body={"conflicts": "proceed", "query": {"match_all": {}}}
)

ObjectApiResponse({'took': 36, 'timed_out': False, 'total': 7, 'deleted': 7, 'batches': 1, 'version_conflicts': 0, 'noops': 0, 'retries': {'bulk': 0, 'search': 0}, 'throttled_millis': 0, 'requests_per_second': -1.0, 'throttled_until_millis': 0, 'failures': []})

bulk

In [151]:
from elasticsearch import Elasticsearch, helpers

# Ensure the 'products' index exists
es.options(ignore_status=[400, 404]).indices.create(
    index="products"
)  # Ignores the error if the index already exists

# Define the actions for the bulk request
actions = [
    {
        "_op_type": "index",
        "_index": "products",
        "_id": 200,
        "_source": {"name": "Espresso Machine", "price": 199, "in_stock": 5},
    },
    {
        "_op_type": "create",
        "_index": "products",
        "_id": 201,
        "_source": {"name": "Milk Frother", "price": 149, "in_stock": 14},
    },
    {
        "_op_type": "update",
        "_index": "products",
        "_id": 201,
        "_source": {"doc": {"price": 129}},
    },
    {"_op_type": "delete", "_index": "products", "_id": 200},
]

# Execute the bulk request
try:
    success, failed = helpers.bulk(es, actions)
    print(f"Bulk operation response: {success} succeeded, {failed} failed")
except Exception as e:
    print(f"An error occurred: {e}")

Bulk operation response: 4 succeeded, [] failed


read data from json to elastic

In [2]:
import json
from elasticsearch import Elasticsearch, helpers

# Initialize Elasticsearch client


def parse_bulk_file(filename):
    actions = []
    with open(filename, "r") as file:
        while True:
            action_line = file.readline().strip()
            if not action_line:
                break  # End of file
            document_line = file.readline().strip()
            if not document_line:
                raise ValueError("Unexpected end of file")

            action = json.loads(action_line)
            document = json.loads(document_line)

            # Combine action and document
            if "index" in action:
                actions.append(
                    {
                        "_op_type": "index",
                        "_index": action["index"].get(
                            "_index", "products"
                        ),  # Default to 'products' if not specified
                        "_id": action["index"]["_id"],
                        "_source": document,
                    }
                )
            elif "create" in action:
                actions.append(
                    {
                        "_op_type": "create",
                        "_index": action["create"].get("_index", "products"),
                        "_id": action["create"]["_id"],
                        "_source": document,
                    }
                )
            elif "update" in action:
                actions.append(
                    {
                        "_op_type": "update",
                        "_index": action["update"].get("_index", "products"),
                        "_id": action["update"]["_id"],
                        "_source": {"doc": document},
                    }
                )
            elif "delete" in action:
                actions.append(
                    {
                        "_op_type": "delete",
                        "_index": action["delete"].get("_index", "products"),
                        "_id": action["delete"]["_id"],
                    }
                )
    return actions


try:
    # Ensure the 'products' index exists
    es.indices.create(
        index="products", ignore=400
    )  # ignore 400 means to ignore "index already exists" error

    # Parse the bulk file
    actions = parse_bulk_file("products-bulk.json")

    # Perform the bulk operation
    success, failed = helpers.bulk(es, actions, stats_only=True)

    # Print the response
    print(
        f"Bulk operation completed. Successful operations: {success}, Failed operations: {failed}"
    )

except json.JSONDecodeError as e:
    print(f"Error parsing JSON: {e}")

except Exception as e:
    print(f"An unexpected error occurred: {e}")

  es.indices.create(


Bulk operation completed. Successful operations: 1000, Failed operations: 0
